|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 6807, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00044072278536800354, |
|
"grad_norm": 0.24354467550812778, |
|
"learning_rate": 2.936857562408223e-07, |
|
"loss": 2.1339, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0022036139268400176, |
|
"grad_norm": 0.27040776216996587, |
|
"learning_rate": 1.4684287812041115e-06, |
|
"loss": 2.3535, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004407227853680035, |
|
"grad_norm": 0.23978713638500837, |
|
"learning_rate": 2.936857562408223e-06, |
|
"loss": 2.0659, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006610841780520053, |
|
"grad_norm": 0.2562898248528363, |
|
"learning_rate": 4.4052863436123355e-06, |
|
"loss": 2.0742, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00881445570736007, |
|
"grad_norm": 0.2671940707327157, |
|
"learning_rate": 5.873715124816446e-06, |
|
"loss": 2.0397, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.011018069634200088, |
|
"grad_norm": 0.2840547143688825, |
|
"learning_rate": 7.3421439060205585e-06, |
|
"loss": 2.3034, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.013221683561040106, |
|
"grad_norm": 0.2739646963997833, |
|
"learning_rate": 8.810572687224671e-06, |
|
"loss": 2.0168, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.015425297487880123, |
|
"grad_norm": 0.36046684768779125, |
|
"learning_rate": 1.0279001468428782e-05, |
|
"loss": 2.1073, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01762891141472014, |
|
"grad_norm": 0.29214076993878346, |
|
"learning_rate": 1.1747430249632892e-05, |
|
"loss": 1.9053, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01983252534156016, |
|
"grad_norm": 0.4413639637555924, |
|
"learning_rate": 1.3215859030837005e-05, |
|
"loss": 2.272, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.022036139268400177, |
|
"grad_norm": 0.3645215252816923, |
|
"learning_rate": 1.4684287812041117e-05, |
|
"loss": 2.0033, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.024239753195240195, |
|
"grad_norm": 0.26241461964383006, |
|
"learning_rate": 1.615271659324523e-05, |
|
"loss": 2.0168, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.026443367122080213, |
|
"grad_norm": 0.3108898378679939, |
|
"learning_rate": 1.7621145374449342e-05, |
|
"loss": 2.1111, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02864698104892023, |
|
"grad_norm": 0.2662326669897319, |
|
"learning_rate": 1.9089574155653454e-05, |
|
"loss": 1.8806, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.030850594975760245, |
|
"grad_norm": 0.16963519927101947, |
|
"learning_rate": 2.0558002936857563e-05, |
|
"loss": 2.1257, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03305420890260027, |
|
"grad_norm": 0.23310716143802493, |
|
"learning_rate": 2.2026431718061676e-05, |
|
"loss": 1.8759, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03525782282944028, |
|
"grad_norm": 0.2575993518919339, |
|
"learning_rate": 2.3494860499265785e-05, |
|
"loss": 1.9053, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0374614367562803, |
|
"grad_norm": 0.2746073869104198, |
|
"learning_rate": 2.4963289280469897e-05, |
|
"loss": 1.7651, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.03966505068312032, |
|
"grad_norm": 0.2279037029835098, |
|
"learning_rate": 2.643171806167401e-05, |
|
"loss": 1.8627, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04186866460996033, |
|
"grad_norm": 0.27296809706241965, |
|
"learning_rate": 2.7900146842878122e-05, |
|
"loss": 1.9057, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.044072278536800354, |
|
"grad_norm": 0.2670117144218064, |
|
"learning_rate": 2.9368575624082234e-05, |
|
"loss": 1.8911, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04627589246364037, |
|
"grad_norm": 0.23511621219550868, |
|
"learning_rate": 3.0837004405286347e-05, |
|
"loss": 2.024, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.04847950639048039, |
|
"grad_norm": 0.24414856722360948, |
|
"learning_rate": 3.230543318649046e-05, |
|
"loss": 1.9756, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.050683120317320404, |
|
"grad_norm": 0.25077238640232496, |
|
"learning_rate": 3.377386196769457e-05, |
|
"loss": 1.7627, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.052886734244160426, |
|
"grad_norm": 0.30541323094735956, |
|
"learning_rate": 3.5242290748898684e-05, |
|
"loss": 1.9455, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05509034817100044, |
|
"grad_norm": 0.2593735959377697, |
|
"learning_rate": 3.6710719530102796e-05, |
|
"loss": 1.9408, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05729396209784046, |
|
"grad_norm": 0.2356435173973878, |
|
"learning_rate": 3.817914831130691e-05, |
|
"loss": 1.9746, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.059497576024680476, |
|
"grad_norm": 0.2556575372905199, |
|
"learning_rate": 3.9647577092511014e-05, |
|
"loss": 1.9133, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.06170118995152049, |
|
"grad_norm": 0.23671492386855494, |
|
"learning_rate": 4.1116005873715127e-05, |
|
"loss": 1.7228, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0639048038783605, |
|
"grad_norm": 0.2522128793647996, |
|
"learning_rate": 4.258443465491924e-05, |
|
"loss": 1.9416, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.06610841780520053, |
|
"grad_norm": 0.259439737149894, |
|
"learning_rate": 4.405286343612335e-05, |
|
"loss": 2.0295, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06831203173204055, |
|
"grad_norm": 0.2632402024665515, |
|
"learning_rate": 4.5521292217327464e-05, |
|
"loss": 1.7906, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.07051564565888056, |
|
"grad_norm": 0.28869506877919027, |
|
"learning_rate": 4.698972099853157e-05, |
|
"loss": 1.73, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07271925958572058, |
|
"grad_norm": 0.2586750438465526, |
|
"learning_rate": 4.845814977973568e-05, |
|
"loss": 1.8038, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.0749228735125606, |
|
"grad_norm": 0.2506125082680772, |
|
"learning_rate": 4.9926578560939794e-05, |
|
"loss": 1.8558, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.07712648743940062, |
|
"grad_norm": 0.2876243539987618, |
|
"learning_rate": 5.1395007342143906e-05, |
|
"loss": 1.9784, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.07933010136624064, |
|
"grad_norm": 0.23288536708042795, |
|
"learning_rate": 5.286343612334802e-05, |
|
"loss": 1.7534, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08153371529308065, |
|
"grad_norm": 0.25062968804040664, |
|
"learning_rate": 5.433186490455213e-05, |
|
"loss": 1.8032, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.08373732921992066, |
|
"grad_norm": 0.3358849445093002, |
|
"learning_rate": 5.5800293685756244e-05, |
|
"loss": 1.9325, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08594094314676069, |
|
"grad_norm": 0.25234625121100573, |
|
"learning_rate": 5.7268722466960356e-05, |
|
"loss": 1.8855, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.08814455707360071, |
|
"grad_norm": 0.3365993483895123, |
|
"learning_rate": 5.873715124816447e-05, |
|
"loss": 1.8118, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09034817100044072, |
|
"grad_norm": 0.311599846916865, |
|
"learning_rate": 6.020558002936858e-05, |
|
"loss": 2.0721, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.09255178492728074, |
|
"grad_norm": 0.3363293627242514, |
|
"learning_rate": 6.167400881057269e-05, |
|
"loss": 1.804, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09475539885412076, |
|
"grad_norm": 0.3414057038117249, |
|
"learning_rate": 6.31424375917768e-05, |
|
"loss": 1.871, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.09695901278096078, |
|
"grad_norm": 0.3051539417193126, |
|
"learning_rate": 6.461086637298092e-05, |
|
"loss": 2.0932, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.0991626267078008, |
|
"grad_norm": 0.2859223655623353, |
|
"learning_rate": 6.607929515418503e-05, |
|
"loss": 1.8641, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.10136624063464081, |
|
"grad_norm": 0.30959368455808256, |
|
"learning_rate": 6.754772393538914e-05, |
|
"loss": 1.8437, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.10356985456148082, |
|
"grad_norm": 0.3544528067642694, |
|
"learning_rate": 6.901615271659326e-05, |
|
"loss": 1.9002, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.10577346848832085, |
|
"grad_norm": 0.34632827275641465, |
|
"learning_rate": 7.048458149779737e-05, |
|
"loss": 1.8398, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.10797708241516087, |
|
"grad_norm": 0.597473977518495, |
|
"learning_rate": 7.195301027900148e-05, |
|
"loss": 1.9357, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.11018069634200088, |
|
"grad_norm": 0.35953743798846133, |
|
"learning_rate": 7.342143906020559e-05, |
|
"loss": 1.944, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1123843102688409, |
|
"grad_norm": 0.32165430097730124, |
|
"learning_rate": 7.48898678414097e-05, |
|
"loss": 1.8974, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.11458792419568092, |
|
"grad_norm": 0.2616578656705798, |
|
"learning_rate": 7.635829662261382e-05, |
|
"loss": 1.6988, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.11679153812252094, |
|
"grad_norm": 0.37507900741140277, |
|
"learning_rate": 7.782672540381793e-05, |
|
"loss": 1.9061, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.11899515204936095, |
|
"grad_norm": 0.3505810167092092, |
|
"learning_rate": 7.929515418502203e-05, |
|
"loss": 1.7829, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.12119876597620097, |
|
"grad_norm": 0.2905187770243884, |
|
"learning_rate": 8.076358296622614e-05, |
|
"loss": 1.896, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.12340237990304098, |
|
"grad_norm": 0.31667074390598227, |
|
"learning_rate": 8.223201174743025e-05, |
|
"loss": 1.8549, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.125605993829881, |
|
"grad_norm": 0.33256810111534474, |
|
"learning_rate": 8.370044052863437e-05, |
|
"loss": 1.9184, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.127809607756721, |
|
"grad_norm": 0.41094013265883617, |
|
"learning_rate": 8.516886930983848e-05, |
|
"loss": 1.9407, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.13001322168356105, |
|
"grad_norm": 0.3268179907564926, |
|
"learning_rate": 8.663729809104259e-05, |
|
"loss": 1.858, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.13221683561040107, |
|
"grad_norm": 0.4082234533551937, |
|
"learning_rate": 8.81057268722467e-05, |
|
"loss": 1.8172, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13442044953724108, |
|
"grad_norm": 0.3303954200472522, |
|
"learning_rate": 8.957415565345081e-05, |
|
"loss": 1.7879, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.1366240634640811, |
|
"grad_norm": 0.3785434588284624, |
|
"learning_rate": 9.104258443465493e-05, |
|
"loss": 1.7931, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1388276773909211, |
|
"grad_norm": 0.3603897034728447, |
|
"learning_rate": 9.251101321585903e-05, |
|
"loss": 1.7793, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.14103129131776113, |
|
"grad_norm": 0.2967904662976913, |
|
"learning_rate": 9.397944199706314e-05, |
|
"loss": 1.7092, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.14323490524460114, |
|
"grad_norm": 0.2976559195453983, |
|
"learning_rate": 9.544787077826725e-05, |
|
"loss": 1.8302, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.14543851917144116, |
|
"grad_norm": 0.34525724393698276, |
|
"learning_rate": 9.691629955947136e-05, |
|
"loss": 1.9488, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.14764213309828117, |
|
"grad_norm": 0.3473487500731534, |
|
"learning_rate": 9.838472834067548e-05, |
|
"loss": 1.8169, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.1498457470251212, |
|
"grad_norm": 0.35776737697830296, |
|
"learning_rate": 9.985315712187959e-05, |
|
"loss": 1.7562, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.15204936095196123, |
|
"grad_norm": 0.35067754753032443, |
|
"learning_rate": 0.00010132158590308371, |
|
"loss": 1.9597, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.15425297487880124, |
|
"grad_norm": 0.31111407532049806, |
|
"learning_rate": 0.00010279001468428781, |
|
"loss": 1.6977, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.15645658880564126, |
|
"grad_norm": 0.350081092372353, |
|
"learning_rate": 0.00010425844346549194, |
|
"loss": 1.7585, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.15866020273248127, |
|
"grad_norm": 0.3445787779571548, |
|
"learning_rate": 0.00010572687224669604, |
|
"loss": 1.7956, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.16086381665932128, |
|
"grad_norm": 0.2992117867602888, |
|
"learning_rate": 0.00010719530102790014, |
|
"loss": 1.7752, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.1630674305861613, |
|
"grad_norm": 0.3754242349038873, |
|
"learning_rate": 0.00010866372980910426, |
|
"loss": 1.8151, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.16527104451300131, |
|
"grad_norm": 0.36786720092684805, |
|
"learning_rate": 0.00011013215859030836, |
|
"loss": 1.8172, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.16747465843984133, |
|
"grad_norm": 0.3572694028170277, |
|
"learning_rate": 0.00011160058737151249, |
|
"loss": 1.8668, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.16967827236668137, |
|
"grad_norm": 0.34400588996372305, |
|
"learning_rate": 0.00011306901615271659, |
|
"loss": 1.9685, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.17188188629352139, |
|
"grad_norm": 0.3484332919699417, |
|
"learning_rate": 0.00011453744493392071, |
|
"loss": 1.999, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1740855002203614, |
|
"grad_norm": 0.3167720096524829, |
|
"learning_rate": 0.00011600587371512481, |
|
"loss": 1.7002, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.17628911414720141, |
|
"grad_norm": 0.30865191358312394, |
|
"learning_rate": 0.00011747430249632894, |
|
"loss": 1.7808, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.17849272807404143, |
|
"grad_norm": 0.3635805195870158, |
|
"learning_rate": 0.00011894273127753304, |
|
"loss": 1.8711, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.18069634200088144, |
|
"grad_norm": 0.3113426681083048, |
|
"learning_rate": 0.00012041116005873716, |
|
"loss": 1.7559, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.18289995592772146, |
|
"grad_norm": 0.3500754402872819, |
|
"learning_rate": 0.00012187958883994126, |
|
"loss": 2.0623, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.18510356985456147, |
|
"grad_norm": 0.3002611868025747, |
|
"learning_rate": 0.00012334801762114539, |
|
"loss": 1.6155, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.1873071837814015, |
|
"grad_norm": 0.30728017118253237, |
|
"learning_rate": 0.00012481644640234947, |
|
"loss": 1.7865, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.18951079770824153, |
|
"grad_norm": 0.30435853108098426, |
|
"learning_rate": 0.0001262848751835536, |
|
"loss": 1.8101, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.19171441163508154, |
|
"grad_norm": 0.33728305221904875, |
|
"learning_rate": 0.0001277533039647577, |
|
"loss": 1.7445, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.19391802556192156, |
|
"grad_norm": 0.3269188152820616, |
|
"learning_rate": 0.00012922173274596184, |
|
"loss": 1.8443, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.19612163948876157, |
|
"grad_norm": 0.30824761799725153, |
|
"learning_rate": 0.00013069016152716592, |
|
"loss": 1.8385, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.1983252534156016, |
|
"grad_norm": 0.2692779779339547, |
|
"learning_rate": 0.00013215859030837006, |
|
"loss": 1.7256, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2005288673424416, |
|
"grad_norm": 0.29633902366885784, |
|
"learning_rate": 0.00013362701908957415, |
|
"loss": 1.8496, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.20273248126928162, |
|
"grad_norm": 0.31921033190555953, |
|
"learning_rate": 0.00013509544787077829, |
|
"loss": 1.9118, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.20493609519612163, |
|
"grad_norm": 0.38557464681376724, |
|
"learning_rate": 0.00013656387665198237, |
|
"loss": 1.7992, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.20713970912296165, |
|
"grad_norm": 0.28175106471056965, |
|
"learning_rate": 0.0001380323054331865, |
|
"loss": 1.8768, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2093433230498017, |
|
"grad_norm": 0.31789045276124145, |
|
"learning_rate": 0.0001395007342143906, |
|
"loss": 1.7628, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.2115469369766417, |
|
"grad_norm": 0.2412861323934925, |
|
"learning_rate": 0.00014096916299559473, |
|
"loss": 1.7988, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.21375055090348172, |
|
"grad_norm": 0.24858109179997467, |
|
"learning_rate": 0.00014243759177679882, |
|
"loss": 1.8673, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.21595416483032173, |
|
"grad_norm": 0.30755969433310765, |
|
"learning_rate": 0.00014390602055800296, |
|
"loss": 1.7105, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.21815777875716175, |
|
"grad_norm": 0.2249398504391689, |
|
"learning_rate": 0.00014537444933920705, |
|
"loss": 1.7837, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.22036139268400176, |
|
"grad_norm": 0.3335603893570301, |
|
"learning_rate": 0.00014684287812041118, |
|
"loss": 1.8333, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22256500661084178, |
|
"grad_norm": 0.3038176355961198, |
|
"learning_rate": 0.00014831130690161527, |
|
"loss": 1.8275, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.2247686205376818, |
|
"grad_norm": 0.30793332193972345, |
|
"learning_rate": 0.0001497797356828194, |
|
"loss": 1.8177, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2269722344645218, |
|
"grad_norm": 0.3072544700688745, |
|
"learning_rate": 0.0001512481644640235, |
|
"loss": 1.9828, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.22917584839136185, |
|
"grad_norm": 0.34999895392965585, |
|
"learning_rate": 0.00015271659324522763, |
|
"loss": 1.9117, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.23137946231820186, |
|
"grad_norm": 0.32911766969763456, |
|
"learning_rate": 0.00015418502202643172, |
|
"loss": 1.6697, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.23358307624504188, |
|
"grad_norm": 0.2617607674469746, |
|
"learning_rate": 0.00015565345080763586, |
|
"loss": 1.6296, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.2357866901718819, |
|
"grad_norm": 0.43182139308340584, |
|
"learning_rate": 0.00015712187958883994, |
|
"loss": 1.9794, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.2379903040987219, |
|
"grad_norm": 0.3127396374267501, |
|
"learning_rate": 0.00015859030837004406, |
|
"loss": 1.7436, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.24019391802556192, |
|
"grad_norm": 0.21415922561789702, |
|
"learning_rate": 0.00016005873715124817, |
|
"loss": 1.7742, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.24239753195240193, |
|
"grad_norm": 0.31852170762507637, |
|
"learning_rate": 0.00016152716593245228, |
|
"loss": 1.7969, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.24460114587924195, |
|
"grad_norm": 0.29928941424884764, |
|
"learning_rate": 0.0001629955947136564, |
|
"loss": 1.8178, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.24680475980608196, |
|
"grad_norm": 0.2145836014852306, |
|
"learning_rate": 0.0001644640234948605, |
|
"loss": 1.6149, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.249008373732922, |
|
"grad_norm": 0.2765877443446801, |
|
"learning_rate": 0.00016593245227606462, |
|
"loss": 1.859, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.251211987659762, |
|
"grad_norm": 0.26872348364190873, |
|
"learning_rate": 0.00016740088105726873, |
|
"loss": 1.8757, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.25341560158660204, |
|
"grad_norm": 0.3149952835357651, |
|
"learning_rate": 0.00016886930983847284, |
|
"loss": 1.869, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.255619215513442, |
|
"grad_norm": 0.2304679465113612, |
|
"learning_rate": 0.00017033773861967696, |
|
"loss": 1.7151, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.25782282944028206, |
|
"grad_norm": 0.24264862789474126, |
|
"learning_rate": 0.00017180616740088107, |
|
"loss": 2.0695, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.2600264433671221, |
|
"grad_norm": 0.2598067252093709, |
|
"learning_rate": 0.00017327459618208518, |
|
"loss": 1.839, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.2622300572939621, |
|
"grad_norm": 0.26705538323533523, |
|
"learning_rate": 0.0001747430249632893, |
|
"loss": 1.8008, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.26443367122080214, |
|
"grad_norm": 0.28479994342274373, |
|
"learning_rate": 0.0001762114537444934, |
|
"loss": 1.7284, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2666372851476421, |
|
"grad_norm": 0.28497317726898896, |
|
"learning_rate": 0.00017767988252569752, |
|
"loss": 1.9214, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.26884089907448216, |
|
"grad_norm": 0.33639476801612694, |
|
"learning_rate": 0.00017914831130690163, |
|
"loss": 1.7516, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.27104451300132215, |
|
"grad_norm": 0.25526934350033054, |
|
"learning_rate": 0.00018061674008810574, |
|
"loss": 1.7688, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.2732481269281622, |
|
"grad_norm": 0.28869758623973935, |
|
"learning_rate": 0.00018208516886930985, |
|
"loss": 1.7843, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2754517408550022, |
|
"grad_norm": 0.2782854068624019, |
|
"learning_rate": 0.00018355359765051397, |
|
"loss": 1.8081, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.2776553547818422, |
|
"grad_norm": 0.28975241284668296, |
|
"learning_rate": 0.00018502202643171805, |
|
"loss": 2.0243, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.27985896870868227, |
|
"grad_norm": 0.3308791482681284, |
|
"learning_rate": 0.0001864904552129222, |
|
"loss": 1.9301, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.28206258263552225, |
|
"grad_norm": 0.2725800531393519, |
|
"learning_rate": 0.00018795888399412628, |
|
"loss": 1.8126, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.2842661965623623, |
|
"grad_norm": 0.2347435274751105, |
|
"learning_rate": 0.00018942731277533042, |
|
"loss": 1.9526, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.2864698104892023, |
|
"grad_norm": 0.2594593727241724, |
|
"learning_rate": 0.0001908957415565345, |
|
"loss": 1.7362, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.2886734244160423, |
|
"grad_norm": 0.20428739622605385, |
|
"learning_rate": 0.00019236417033773864, |
|
"loss": 1.9143, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.2908770383428823, |
|
"grad_norm": 0.35231366470248277, |
|
"learning_rate": 0.00019383259911894273, |
|
"loss": 1.882, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.29308065226972235, |
|
"grad_norm": 0.28303949235132847, |
|
"learning_rate": 0.00019530102790014687, |
|
"loss": 1.9956, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.29528426619656234, |
|
"grad_norm": 0.28380137271318484, |
|
"learning_rate": 0.00019676945668135095, |
|
"loss": 1.9298, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.2974878801234024, |
|
"grad_norm": 0.29197603939206423, |
|
"learning_rate": 0.0001982378854625551, |
|
"loss": 1.9936, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.2996914940502424, |
|
"grad_norm": 0.3192279737304851, |
|
"learning_rate": 0.00019970631424375918, |
|
"loss": 1.856, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.3018951079770824, |
|
"grad_norm": 0.31555391042987874, |
|
"learning_rate": 0.00019999978960491256, |
|
"loss": 1.9492, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.30409872190392245, |
|
"grad_norm": 0.26468213545329267, |
|
"learning_rate": 0.0001999989348763872, |
|
"loss": 1.867, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.30630233583076244, |
|
"grad_norm": 0.2591553540599883, |
|
"learning_rate": 0.0001999974226703463, |
|
"loss": 1.7565, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.3085059497576025, |
|
"grad_norm": 0.33857027825097774, |
|
"learning_rate": 0.00019999525299673244, |
|
"loss": 1.8407, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.31070956368444247, |
|
"grad_norm": 0.27554046406812405, |
|
"learning_rate": 0.0001999924258698108, |
|
"loss": 1.8449, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.3129131776112825, |
|
"grad_norm": 0.2906036501277061, |
|
"learning_rate": 0.0001999889413081694, |
|
"loss": 1.9425, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3151167915381225, |
|
"grad_norm": 0.24516499633235853, |
|
"learning_rate": 0.00019998479933471862, |
|
"loss": 1.8373, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.31732040546496254, |
|
"grad_norm": 0.261325768775238, |
|
"learning_rate": 0.0001999799999766913, |
|
"loss": 1.8655, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3195240193918026, |
|
"grad_norm": 0.26292913191780404, |
|
"learning_rate": 0.00019997454326564252, |
|
"loss": 1.8011, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.32172763331864257, |
|
"grad_norm": 0.24941214673161538, |
|
"learning_rate": 0.0001999684292374493, |
|
"loss": 1.7063, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3239312472454826, |
|
"grad_norm": 0.285047883815678, |
|
"learning_rate": 0.00019996165793231038, |
|
"loss": 1.9537, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.3261348611723226, |
|
"grad_norm": 0.2853612548244496, |
|
"learning_rate": 0.0001999542293947461, |
|
"loss": 1.7641, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.32833847509916264, |
|
"grad_norm": 0.2600200330765211, |
|
"learning_rate": 0.00019994614367359792, |
|
"loss": 1.8886, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.33054208902600263, |
|
"grad_norm": 0.36999348537839605, |
|
"learning_rate": 0.00019993740082202818, |
|
"loss": 1.798, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.33274570295284267, |
|
"grad_norm": 0.24647791783433215, |
|
"learning_rate": 0.00019992800089751984, |
|
"loss": 1.8922, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.33494931687968266, |
|
"grad_norm": 0.23414809097297196, |
|
"learning_rate": 0.0001999179439618759, |
|
"loss": 1.6675, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.3371529308065227, |
|
"grad_norm": 0.24171495213943817, |
|
"learning_rate": 0.00019990723008121917, |
|
"loss": 1.5054, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.33935654473336274, |
|
"grad_norm": 0.2462163957540956, |
|
"learning_rate": 0.00019989585932599172, |
|
"loss": 1.8441, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.34156015866020273, |
|
"grad_norm": 0.24192478872496498, |
|
"learning_rate": 0.00019988383177095459, |
|
"loss": 1.8535, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.34376377258704277, |
|
"grad_norm": 0.21554799200025276, |
|
"learning_rate": 0.000199871147495187, |
|
"loss": 1.7299, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.34596738651388276, |
|
"grad_norm": 0.29240426650240625, |
|
"learning_rate": 0.00019985780658208618, |
|
"loss": 1.9846, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.3481710004407228, |
|
"grad_norm": 0.32582827846868756, |
|
"learning_rate": 0.00019984380911936648, |
|
"loss": 1.5922, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.3503746143675628, |
|
"grad_norm": 0.3253623043496682, |
|
"learning_rate": 0.00019982915519905912, |
|
"loss": 1.7138, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.35257822829440283, |
|
"grad_norm": 0.3012253411690825, |
|
"learning_rate": 0.00019981384491751133, |
|
"loss": 1.9526, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3547818422212428, |
|
"grad_norm": 0.32373710599771705, |
|
"learning_rate": 0.00019979787837538587, |
|
"loss": 2.0799, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.35698545614808286, |
|
"grad_norm": 0.2736459891711902, |
|
"learning_rate": 0.00019978125567766023, |
|
"loss": 1.8422, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.3591890700749229, |
|
"grad_norm": 0.28970333253517644, |
|
"learning_rate": 0.00019976397693362614, |
|
"loss": 1.8309, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.3613926840017629, |
|
"grad_norm": 0.271249926943477, |
|
"learning_rate": 0.0001997460422568886, |
|
"loss": 1.6581, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.36359629792860293, |
|
"grad_norm": 0.25582927997161137, |
|
"learning_rate": 0.00019972745176536537, |
|
"loss": 1.9441, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.3657999118554429, |
|
"grad_norm": 0.27987061803178176, |
|
"learning_rate": 0.00019970820558128604, |
|
"loss": 1.8015, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.36800352578228296, |
|
"grad_norm": 0.2956257318754398, |
|
"learning_rate": 0.0001996883038311913, |
|
"loss": 1.7853, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.37020713970912295, |
|
"grad_norm": 0.25727871315384043, |
|
"learning_rate": 0.00019966774664593206, |
|
"loss": 1.7594, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.372410753635963, |
|
"grad_norm": 0.3069722747545403, |
|
"learning_rate": 0.00019964653416066868, |
|
"loss": 1.9102, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.374614367562803, |
|
"grad_norm": 0.362312858514461, |
|
"learning_rate": 0.0001996246665148699, |
|
"loss": 1.8419, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.376817981489643, |
|
"grad_norm": 0.30043599837768675, |
|
"learning_rate": 0.00019960214385231217, |
|
"loss": 1.9281, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.37902159541648306, |
|
"grad_norm": 0.25742900200071334, |
|
"learning_rate": 0.00019957896632107845, |
|
"loss": 1.8382, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.38122520934332305, |
|
"grad_norm": 0.22904042108335546, |
|
"learning_rate": 0.00019955513407355743, |
|
"loss": 1.585, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.3834288232701631, |
|
"grad_norm": 0.35248924984262536, |
|
"learning_rate": 0.0001995306472664425, |
|
"loss": 1.8779, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.3856324371970031, |
|
"grad_norm": 0.21394107686214808, |
|
"learning_rate": 0.00019950550606073056, |
|
"loss": 1.6203, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.3878360511238431, |
|
"grad_norm": 0.23210191109497874, |
|
"learning_rate": 0.00019947971062172118, |
|
"loss": 1.7579, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.3900396650506831, |
|
"grad_norm": 0.280197585305159, |
|
"learning_rate": 0.00019945326111901542, |
|
"loss": 1.8697, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.39224327897752315, |
|
"grad_norm": 0.2007938963214883, |
|
"learning_rate": 0.00019942615772651455, |
|
"loss": 1.6718, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.39444689290436313, |
|
"grad_norm": 0.28396842059026434, |
|
"learning_rate": 0.0001993984006224193, |
|
"loss": 1.8261, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.3966505068312032, |
|
"grad_norm": 0.2970710682132302, |
|
"learning_rate": 0.00019936998998922826, |
|
"loss": 1.8988, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3988541207580432, |
|
"grad_norm": 0.4794659287163716, |
|
"learning_rate": 0.00019934092601373694, |
|
"loss": 1.8387, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.4010577346848832, |
|
"grad_norm": 0.2810211439045603, |
|
"learning_rate": 0.00019931120888703652, |
|
"loss": 1.7516, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.40326134861172325, |
|
"grad_norm": 0.28984779802870314, |
|
"learning_rate": 0.0001992808388045125, |
|
"loss": 1.9212, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.40546496253856323, |
|
"grad_norm": 0.26294811280148017, |
|
"learning_rate": 0.00019924981596584345, |
|
"loss": 1.8798, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.4076685764654033, |
|
"grad_norm": 0.27546864810479915, |
|
"learning_rate": 0.00019921814057499978, |
|
"loss": 1.7595, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.40987219039224326, |
|
"grad_norm": 0.2506019656377615, |
|
"learning_rate": 0.0001991858128402422, |
|
"loss": 1.7625, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.4120758043190833, |
|
"grad_norm": 0.35470941281384444, |
|
"learning_rate": 0.0001991528329741206, |
|
"loss": 1.8644, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.4142794182459233, |
|
"grad_norm": 0.30456048523132856, |
|
"learning_rate": 0.00019911920119347254, |
|
"loss": 1.8427, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.41648303217276333, |
|
"grad_norm": 0.21180617855630868, |
|
"learning_rate": 0.0001990849177194217, |
|
"loss": 1.947, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.4186866460996034, |
|
"grad_norm": 0.27175217869149726, |
|
"learning_rate": 0.00019904998277737668, |
|
"loss": 1.5794, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.42089026002644336, |
|
"grad_norm": 0.25340188979799705, |
|
"learning_rate": 0.00019901439659702924, |
|
"loss": 1.655, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.4230938739532834, |
|
"grad_norm": 0.32039092541913483, |
|
"learning_rate": 0.00019897815941235307, |
|
"loss": 1.9448, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.4252974878801234, |
|
"grad_norm": 0.28203099326236186, |
|
"learning_rate": 0.00019894127146160204, |
|
"loss": 1.73, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.42750110180696343, |
|
"grad_norm": 0.790698073004524, |
|
"learning_rate": 0.00019890373298730868, |
|
"loss": 2.0466, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.4297047157338034, |
|
"grad_norm": 0.23107801878483133, |
|
"learning_rate": 0.0001988655442362827, |
|
"loss": 1.5805, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.43190832966064346, |
|
"grad_norm": 0.2396293478769625, |
|
"learning_rate": 0.00019882670545960914, |
|
"loss": 1.7482, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.43411194358748345, |
|
"grad_norm": 0.29159214942433453, |
|
"learning_rate": 0.00019878721691264704, |
|
"loss": 1.9851, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.4363155575143235, |
|
"grad_norm": 0.2825006648123125, |
|
"learning_rate": 0.00019874707885502745, |
|
"loss": 1.7534, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.43851917144116354, |
|
"grad_norm": 0.28034425756379244, |
|
"learning_rate": 0.00019870629155065186, |
|
"loss": 1.7489, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.4407227853680035, |
|
"grad_norm": 0.3181059895374247, |
|
"learning_rate": 0.0001986648552676905, |
|
"loss": 1.8798, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.44292639929484356, |
|
"grad_norm": 0.20367681253555586, |
|
"learning_rate": 0.0001986227702785805, |
|
"loss": 1.8065, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.44513001322168355, |
|
"grad_norm": 0.25873539379499144, |
|
"learning_rate": 0.0001985800368600242, |
|
"loss": 1.687, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.4473336271485236, |
|
"grad_norm": 0.2342074611961214, |
|
"learning_rate": 0.0001985366552929871, |
|
"loss": 1.9431, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.4495372410753636, |
|
"grad_norm": 0.48215863294928724, |
|
"learning_rate": 0.00019849262586269642, |
|
"loss": 1.8137, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.4517408550022036, |
|
"grad_norm": 0.27724392120608005, |
|
"learning_rate": 0.00019844794885863877, |
|
"loss": 1.8311, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.4539444689290436, |
|
"grad_norm": 0.22655546528476625, |
|
"learning_rate": 0.00019840262457455855, |
|
"loss": 1.6968, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.45614808285588365, |
|
"grad_norm": 0.24804519754733267, |
|
"learning_rate": 0.00019835665330845595, |
|
"loss": 1.844, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.4583516967827237, |
|
"grad_norm": 0.3053870909241666, |
|
"learning_rate": 0.00019831003536258487, |
|
"loss": 1.6674, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.4605553107095637, |
|
"grad_norm": 0.2677054196356507, |
|
"learning_rate": 0.00019826277104345109, |
|
"loss": 1.994, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.4627589246364037, |
|
"grad_norm": 0.2332936772139347, |
|
"learning_rate": 0.0001982148606618102, |
|
"loss": 1.7963, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.4649625385632437, |
|
"grad_norm": 0.290304703836439, |
|
"learning_rate": 0.00019816630453266555, |
|
"loss": 1.8278, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.46716615249008375, |
|
"grad_norm": 0.31440944408786714, |
|
"learning_rate": 0.0001981171029752662, |
|
"loss": 1.7053, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.46936976641692374, |
|
"grad_norm": 0.31837742235142563, |
|
"learning_rate": 0.00019806725631310476, |
|
"loss": 1.8377, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.4715733803437638, |
|
"grad_norm": 0.31835369363351046, |
|
"learning_rate": 0.00019801676487391529, |
|
"loss": 1.7635, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.47377699427060377, |
|
"grad_norm": 0.32008261758346795, |
|
"learning_rate": 0.0001979656289896712, |
|
"loss": 1.8322, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.4759806081974438, |
|
"grad_norm": 0.2436306554476478, |
|
"learning_rate": 0.000197913848996583, |
|
"loss": 1.7057, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.47818422212428385, |
|
"grad_norm": 0.24420764132400904, |
|
"learning_rate": 0.00019786142523509615, |
|
"loss": 1.7756, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.48038783605112384, |
|
"grad_norm": 0.24156752527695532, |
|
"learning_rate": 0.00019780835804988876, |
|
"loss": 1.7788, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.4825914499779639, |
|
"grad_norm": 0.3341728235303617, |
|
"learning_rate": 0.00019775464778986934, |
|
"loss": 1.9, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.48479506390480387, |
|
"grad_norm": 0.23920551280340352, |
|
"learning_rate": 0.00019770029480817454, |
|
"loss": 1.8956, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.4869986778316439, |
|
"grad_norm": 0.2463312946508512, |
|
"learning_rate": 0.00019764529946216682, |
|
"loss": 1.5345, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.4892022917584839, |
|
"grad_norm": 0.26366246938923554, |
|
"learning_rate": 0.00019758966211343206, |
|
"loss": 1.7621, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.49140590568532394, |
|
"grad_norm": 0.3123943209000793, |
|
"learning_rate": 0.00019753338312777718, |
|
"loss": 1.815, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.4936095196121639, |
|
"grad_norm": 0.2551443475914167, |
|
"learning_rate": 0.00019747646287522784, |
|
"loss": 1.8611, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.49581313353900397, |
|
"grad_norm": 0.2332525414967985, |
|
"learning_rate": 0.0001974189017300259, |
|
"loss": 1.5384, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.498016747465844, |
|
"grad_norm": 0.2861706387721841, |
|
"learning_rate": 0.00019736070007062692, |
|
"loss": 2.029, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.500220361392684, |
|
"grad_norm": 0.4583730418300565, |
|
"learning_rate": 0.00019730185827969784, |
|
"loss": 1.6826, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.502423975319524, |
|
"grad_norm": 0.1931367375627417, |
|
"learning_rate": 0.00019724237674411432, |
|
"loss": 1.6877, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.5046275892463641, |
|
"grad_norm": 0.3082508866109686, |
|
"learning_rate": 0.00019718225585495824, |
|
"loss": 1.8148, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.5068312031732041, |
|
"grad_norm": 0.18106453992956664, |
|
"learning_rate": 0.00019712149600751517, |
|
"loss": 1.6556, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5090348171000441, |
|
"grad_norm": 0.23000297672552616, |
|
"learning_rate": 0.00019706009760127164, |
|
"loss": 2.0152, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.511238431026884, |
|
"grad_norm": 0.29387352965288377, |
|
"learning_rate": 0.00019699806103991272, |
|
"loss": 1.7962, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5134420449537241, |
|
"grad_norm": 0.23060861917057546, |
|
"learning_rate": 0.00019693538673131917, |
|
"loss": 1.8123, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.5156456588805641, |
|
"grad_norm": 0.25329490761690543, |
|
"learning_rate": 0.00019687207508756486, |
|
"loss": 1.7052, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.5178492728074041, |
|
"grad_norm": 0.3356944723720528, |
|
"learning_rate": 0.00019680812652491408, |
|
"loss": 1.7985, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.5200528867342442, |
|
"grad_norm": 0.31492967037029806, |
|
"learning_rate": 0.0001967435414638187, |
|
"loss": 1.7971, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.5222565006610842, |
|
"grad_norm": 0.2915314540239125, |
|
"learning_rate": 0.00019667832032891554, |
|
"loss": 1.9571, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.5244601145879242, |
|
"grad_norm": 0.28065572919249093, |
|
"learning_rate": 0.00019661246354902342, |
|
"loss": 1.9185, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.5266637285147642, |
|
"grad_norm": 0.22888871376259906, |
|
"learning_rate": 0.00019654597155714044, |
|
"loss": 1.7367, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.5288673424416043, |
|
"grad_norm": 0.23069615225682083, |
|
"learning_rate": 0.00019647884479044123, |
|
"loss": 1.7333, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5310709563684443, |
|
"grad_norm": 0.25845750908289505, |
|
"learning_rate": 0.00019641108369027385, |
|
"loss": 1.5907, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.5332745702952842, |
|
"grad_norm": 0.3069614304651339, |
|
"learning_rate": 0.00019634268870215703, |
|
"loss": 1.9282, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.5354781842221242, |
|
"grad_norm": 0.3005722535622308, |
|
"learning_rate": 0.00019627366027577726, |
|
"loss": 1.6378, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.5376817981489643, |
|
"grad_norm": 0.270244780455395, |
|
"learning_rate": 0.00019620399886498578, |
|
"loss": 1.6499, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.5398854120758043, |
|
"grad_norm": 0.2796374681033303, |
|
"learning_rate": 0.0001961337049277955, |
|
"loss": 1.7962, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.5420890260026443, |
|
"grad_norm": 0.2608673103569785, |
|
"learning_rate": 0.00019606277892637823, |
|
"loss": 1.6946, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.5442926399294844, |
|
"grad_norm": 0.35197474426731085, |
|
"learning_rate": 0.00019599122132706146, |
|
"loss": 1.9751, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.5464962538563244, |
|
"grad_norm": 0.26530267535084306, |
|
"learning_rate": 0.0001959190326003253, |
|
"loss": 1.7257, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.5486998677831644, |
|
"grad_norm": 0.2540337035705083, |
|
"learning_rate": 0.00019584621322079942, |
|
"loss": 1.8693, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.5509034817100044, |
|
"grad_norm": 0.2530867344602867, |
|
"learning_rate": 0.00019577276366726003, |
|
"loss": 1.6761, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5531070956368445, |
|
"grad_norm": 0.2549205571103006, |
|
"learning_rate": 0.00019569868442262655, |
|
"loss": 1.8729, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.5553107095636844, |
|
"grad_norm": 0.2871173332255517, |
|
"learning_rate": 0.00019562397597395857, |
|
"loss": 1.8347, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.5575143234905244, |
|
"grad_norm": 0.28369117286467893, |
|
"learning_rate": 0.0001955486388124525, |
|
"loss": 1.8547, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.5597179374173645, |
|
"grad_norm": 0.2852722364768899, |
|
"learning_rate": 0.00019547267343343857, |
|
"loss": 1.6552, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.5619215513442045, |
|
"grad_norm": 0.34585189639791736, |
|
"learning_rate": 0.0001953960803363774, |
|
"loss": 1.6727, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.5641251652710445, |
|
"grad_norm": 0.2767216642372831, |
|
"learning_rate": 0.00019531886002485674, |
|
"loss": 1.8886, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.5663287791978845, |
|
"grad_norm": 0.20036600537602942, |
|
"learning_rate": 0.00019524101300658813, |
|
"loss": 1.8685, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.5685323931247246, |
|
"grad_norm": 0.261316473836974, |
|
"learning_rate": 0.0001951625397934037, |
|
"loss": 1.782, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.5707360070515646, |
|
"grad_norm": 0.4236076570583356, |
|
"learning_rate": 0.0001950834409012527, |
|
"loss": 1.8318, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.5729396209784046, |
|
"grad_norm": 0.28925306291568, |
|
"learning_rate": 0.00019500371685019806, |
|
"loss": 1.6012, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5751432349052445, |
|
"grad_norm": 0.2945702905493002, |
|
"learning_rate": 0.0001949233681644131, |
|
"loss": 1.9158, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.5773468488320846, |
|
"grad_norm": 0.27644920036772963, |
|
"learning_rate": 0.00019484239537217798, |
|
"loss": 1.8232, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.5795504627589246, |
|
"grad_norm": 0.24910322311175717, |
|
"learning_rate": 0.00019476079900587626, |
|
"loss": 2.0731, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.5817540766857646, |
|
"grad_norm": 0.2746087574116961, |
|
"learning_rate": 0.00019467857960199142, |
|
"loss": 1.8429, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.5839576906126047, |
|
"grad_norm": 0.30262920301398166, |
|
"learning_rate": 0.00019459573770110335, |
|
"loss": 1.7647, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.5861613045394447, |
|
"grad_norm": 0.2570054670626858, |
|
"learning_rate": 0.0001945122738478847, |
|
"loss": 1.7511, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.5883649184662847, |
|
"grad_norm": 0.2506934757685486, |
|
"learning_rate": 0.00019442818859109737, |
|
"loss": 1.9036, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.5905685323931247, |
|
"grad_norm": 0.28984253571835894, |
|
"learning_rate": 0.00019434348248358892, |
|
"loss": 1.7763, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.5927721463199648, |
|
"grad_norm": 0.28120650964379307, |
|
"learning_rate": 0.00019425815608228888, |
|
"loss": 1.8062, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.5949757602468048, |
|
"grad_norm": 0.26796794914129696, |
|
"learning_rate": 0.00019417220994820514, |
|
"loss": 1.7886, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.5971793741736448, |
|
"grad_norm": 0.27855604023858827, |
|
"learning_rate": 0.00019408564464642024, |
|
"loss": 1.6226, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.5993829881004848, |
|
"grad_norm": 0.23996517928921976, |
|
"learning_rate": 0.00019399846074608757, |
|
"loss": 1.8206, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.6015866020273248, |
|
"grad_norm": 0.2718103628870133, |
|
"learning_rate": 0.00019391065882042786, |
|
"loss": 1.622, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.6037902159541648, |
|
"grad_norm": 0.40326599118637163, |
|
"learning_rate": 0.00019382223944672516, |
|
"loss": 1.8595, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.6059938298810048, |
|
"grad_norm": 0.26188146699351833, |
|
"learning_rate": 0.00019373320320632313, |
|
"loss": 1.719, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.6081974438078449, |
|
"grad_norm": 0.26353376595142103, |
|
"learning_rate": 0.00019364355068462126, |
|
"loss": 1.7599, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.6104010577346849, |
|
"grad_norm": 0.27506310894040453, |
|
"learning_rate": 0.00019355328247107106, |
|
"loss": 1.6895, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.6126046716615249, |
|
"grad_norm": 0.23293714707305346, |
|
"learning_rate": 0.00019346239915917204, |
|
"loss": 1.9199, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.6148082855883649, |
|
"grad_norm": 0.2728024319863431, |
|
"learning_rate": 0.00019337090134646787, |
|
"loss": 1.6137, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.617011899515205, |
|
"grad_norm": 0.2580080135345881, |
|
"learning_rate": 0.00019327878963454253, |
|
"loss": 1.9251, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.619215513442045, |
|
"grad_norm": 0.18292171734629373, |
|
"learning_rate": 0.00019318606462901625, |
|
"loss": 1.6127, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.6214191273688849, |
|
"grad_norm": 0.2540991707755816, |
|
"learning_rate": 0.0001930927269395416, |
|
"loss": 1.716, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.623622741295725, |
|
"grad_norm": 0.2222252074542422, |
|
"learning_rate": 0.00019299877717979944, |
|
"loss": 1.649, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.625826355222565, |
|
"grad_norm": 0.23656133580313402, |
|
"learning_rate": 0.00019290421596749487, |
|
"loss": 1.7321, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.628029969149405, |
|
"grad_norm": 0.2770878731693278, |
|
"learning_rate": 0.00019280904392435328, |
|
"loss": 1.7982, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.630233583076245, |
|
"grad_norm": 0.38713553349288304, |
|
"learning_rate": 0.00019271326167611606, |
|
"loss": 1.757, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.6324371970030851, |
|
"grad_norm": 0.21826448168974197, |
|
"learning_rate": 0.00019261686985253668, |
|
"loss": 1.6568, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.6346408109299251, |
|
"grad_norm": 0.28392855935252787, |
|
"learning_rate": 0.00019251986908737646, |
|
"loss": 1.6995, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.6368444248567651, |
|
"grad_norm": 0.3335345636849735, |
|
"learning_rate": 0.00019242226001840043, |
|
"loss": 1.6445, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.6390480387836052, |
|
"grad_norm": 0.23367317521663916, |
|
"learning_rate": 0.0001923240432873731, |
|
"loss": 1.7995, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6412516527104452, |
|
"grad_norm": 0.26851942754392183, |
|
"learning_rate": 0.00019222521954005424, |
|
"loss": 1.8078, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.6434552666372851, |
|
"grad_norm": 0.28393938915488615, |
|
"learning_rate": 0.00019212578942619474, |
|
"loss": 1.7108, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.6456588805641251, |
|
"grad_norm": 0.2481400915816766, |
|
"learning_rate": 0.00019202575359953213, |
|
"loss": 1.7509, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.6478624944909652, |
|
"grad_norm": 0.22200215432596082, |
|
"learning_rate": 0.00019192511271778656, |
|
"loss": 1.6549, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.6500661084178052, |
|
"grad_norm": 0.24388551441860462, |
|
"learning_rate": 0.00019182386744265623, |
|
"loss": 1.9977, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.6522697223446452, |
|
"grad_norm": 0.28231311563927136, |
|
"learning_rate": 0.00019172201843981314, |
|
"loss": 1.7473, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.6544733362714852, |
|
"grad_norm": 0.2911468005732327, |
|
"learning_rate": 0.00019161956637889872, |
|
"loss": 1.8572, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.6566769501983253, |
|
"grad_norm": 0.2809891282005259, |
|
"learning_rate": 0.0001915165119335194, |
|
"loss": 1.6363, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.6588805641251653, |
|
"grad_norm": 0.257092783900919, |
|
"learning_rate": 0.0001914128557812422, |
|
"loss": 1.6894, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.6610841780520053, |
|
"grad_norm": 0.25492249438744313, |
|
"learning_rate": 0.00019130859860359026, |
|
"loss": 1.9549, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6632877919788454, |
|
"grad_norm": 0.29656417671091373, |
|
"learning_rate": 0.00019120374108603843, |
|
"loss": 1.882, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.6654914059056853, |
|
"grad_norm": 0.26795937749586346, |
|
"learning_rate": 0.0001910982839180086, |
|
"loss": 1.7532, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.6676950198325253, |
|
"grad_norm": 0.30019068058678056, |
|
"learning_rate": 0.0001909922277928654, |
|
"loss": 1.9185, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.6698986337593653, |
|
"grad_norm": 0.2908491561313871, |
|
"learning_rate": 0.00019088557340791136, |
|
"loss": 1.8659, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.6721022476862054, |
|
"grad_norm": 0.2664246491795904, |
|
"learning_rate": 0.00019077832146438257, |
|
"loss": 1.618, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.6743058616130454, |
|
"grad_norm": 0.2670150987011056, |
|
"learning_rate": 0.00019067047266744396, |
|
"loss": 1.82, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.6765094755398854, |
|
"grad_norm": 0.26014739460054054, |
|
"learning_rate": 0.0001905620277261847, |
|
"loss": 1.8267, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.6787130894667255, |
|
"grad_norm": 0.39761269473071104, |
|
"learning_rate": 0.00019045298735361345, |
|
"loss": 1.7682, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.6809167033935655, |
|
"grad_norm": 0.25574765659361764, |
|
"learning_rate": 0.0001903433522666538, |
|
"loss": 1.8709, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.6831203173204055, |
|
"grad_norm": 0.2633667660696579, |
|
"learning_rate": 0.00019023312318613945, |
|
"loss": 1.7439, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.6853239312472454, |
|
"grad_norm": 0.33365458542367915, |
|
"learning_rate": 0.00019012230083680954, |
|
"loss": 1.8991, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.6875275451740855, |
|
"grad_norm": 0.26233949374382926, |
|
"learning_rate": 0.0001900108859473039, |
|
"loss": 1.7681, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.6897311591009255, |
|
"grad_norm": 0.27517789876709425, |
|
"learning_rate": 0.00018989887925015814, |
|
"loss": 1.8164, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.6919347730277655, |
|
"grad_norm": 0.22352922332415057, |
|
"learning_rate": 0.00018978628148179897, |
|
"loss": 1.6674, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.6941383869546055, |
|
"grad_norm": 0.2705784713837994, |
|
"learning_rate": 0.0001896730933825393, |
|
"loss": 1.883, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.6963420008814456, |
|
"grad_norm": 0.2333580239686404, |
|
"learning_rate": 0.00018955931569657333, |
|
"loss": 1.6889, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.6985456148082856, |
|
"grad_norm": 0.29573837903023903, |
|
"learning_rate": 0.00018944494917197172, |
|
"loss": 1.8473, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.7007492287351256, |
|
"grad_norm": 0.24394341802838163, |
|
"learning_rate": 0.00018932999456067675, |
|
"loss": 1.844, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.7029528426619657, |
|
"grad_norm": 0.2454925329534158, |
|
"learning_rate": 0.0001892144526184971, |
|
"loss": 1.7824, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.7051564565888057, |
|
"grad_norm": 0.23532373440650545, |
|
"learning_rate": 0.00018909832410510315, |
|
"loss": 1.8537, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7073600705156456, |
|
"grad_norm": 0.2602720785746293, |
|
"learning_rate": 0.00018898160978402198, |
|
"loss": 1.8717, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.7095636844424856, |
|
"grad_norm": 0.23292617880883168, |
|
"learning_rate": 0.00018886431042263208, |
|
"loss": 1.703, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.7117672983693257, |
|
"grad_norm": 0.32473590479932374, |
|
"learning_rate": 0.0001887464267921587, |
|
"loss": 1.7075, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.7139709122961657, |
|
"grad_norm": 0.21131060430922607, |
|
"learning_rate": 0.00018862795966766833, |
|
"loss": 1.6993, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.7161745262230057, |
|
"grad_norm": 0.2945260951578129, |
|
"learning_rate": 0.0001885089098280641, |
|
"loss": 1.7315, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.7183781401498458, |
|
"grad_norm": 0.2988718316963067, |
|
"learning_rate": 0.0001883892780560802, |
|
"loss": 1.7079, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.7205817540766858, |
|
"grad_norm": 0.2671861990469183, |
|
"learning_rate": 0.00018826906513827704, |
|
"loss": 1.8816, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.7227853680035258, |
|
"grad_norm": 0.3539590759381266, |
|
"learning_rate": 0.00018814827186503595, |
|
"loss": 1.7559, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.7249889819303658, |
|
"grad_norm": 0.2098483762554333, |
|
"learning_rate": 0.00018802689903055396, |
|
"loss": 1.8296, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.7271925958572059, |
|
"grad_norm": 0.24342758941079398, |
|
"learning_rate": 0.0001879049474328387, |
|
"loss": 1.8845, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.7293962097840458, |
|
"grad_norm": 0.30213591141833546, |
|
"learning_rate": 0.00018778241787370303, |
|
"loss": 1.7739, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.7315998237108858, |
|
"grad_norm": 0.2796229215884198, |
|
"learning_rate": 0.00018765931115875985, |
|
"loss": 1.7238, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.7338034376377258, |
|
"grad_norm": 0.2818539631618944, |
|
"learning_rate": 0.00018753562809741673, |
|
"loss": 1.7833, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.7360070515645659, |
|
"grad_norm": 0.2534113717910187, |
|
"learning_rate": 0.00018741136950287067, |
|
"loss": 1.781, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.7382106654914059, |
|
"grad_norm": 0.25817838823410116, |
|
"learning_rate": 0.0001872865361921027, |
|
"loss": 1.6845, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.7404142794182459, |
|
"grad_norm": 0.25172352057271447, |
|
"learning_rate": 0.00018716112898587247, |
|
"loss": 1.9169, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.742617893345086, |
|
"grad_norm": 0.257829203989891, |
|
"learning_rate": 0.000187035148708713, |
|
"loss": 1.7977, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.744821507271926, |
|
"grad_norm": 0.21408758544817338, |
|
"learning_rate": 0.00018690859618892506, |
|
"loss": 1.6934, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.747025121198766, |
|
"grad_norm": 0.32305529721640136, |
|
"learning_rate": 0.0001867814722585719, |
|
"loss": 1.811, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.749228735125606, |
|
"grad_norm": 0.27366670016595385, |
|
"learning_rate": 0.0001866537777534737, |
|
"loss": 1.6083, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.751432349052446, |
|
"grad_norm": 0.2978907514821807, |
|
"learning_rate": 0.00018652551351320198, |
|
"loss": 1.7621, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.753635962979286, |
|
"grad_norm": 0.2682049213061407, |
|
"learning_rate": 0.00018639668038107437, |
|
"loss": 1.8008, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.755839576906126, |
|
"grad_norm": 0.2422117187271709, |
|
"learning_rate": 0.0001862672792041487, |
|
"loss": 1.9899, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.7580431908329661, |
|
"grad_norm": 0.27006478070278994, |
|
"learning_rate": 0.0001861373108332177, |
|
"loss": 1.7577, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.7602468047598061, |
|
"grad_norm": 0.31176719361390687, |
|
"learning_rate": 0.0001860067761228033, |
|
"loss": 1.7494, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.7624504186866461, |
|
"grad_norm": 0.2965711368621697, |
|
"learning_rate": 0.00018587567593115098, |
|
"loss": 1.9554, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.7646540326134861, |
|
"grad_norm": 0.26922434332877426, |
|
"learning_rate": 0.0001857440111202242, |
|
"loss": 1.7415, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.7668576465403262, |
|
"grad_norm": 0.25558172886185604, |
|
"learning_rate": 0.00018561178255569879, |
|
"loss": 1.7389, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.7690612604671662, |
|
"grad_norm": 0.24853091753480136, |
|
"learning_rate": 0.000185478991106957, |
|
"loss": 1.9658, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.7712648743940062, |
|
"grad_norm": 0.29324751667885357, |
|
"learning_rate": 0.00018534563764708206, |
|
"loss": 1.8161, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.7734684883208461, |
|
"grad_norm": 0.2517432199347208, |
|
"learning_rate": 0.00018521172305285236, |
|
"loss": 1.6512, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.7756721022476862, |
|
"grad_norm": 0.25142671678387746, |
|
"learning_rate": 0.00018507724820473556, |
|
"loss": 1.7221, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.7778757161745262, |
|
"grad_norm": 0.2851848328431096, |
|
"learning_rate": 0.00018494221398688307, |
|
"loss": 1.9137, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.7800793301013662, |
|
"grad_norm": 0.27922286690055653, |
|
"learning_rate": 0.00018480662128712389, |
|
"loss": 1.7529, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.7822829440282063, |
|
"grad_norm": 0.24188014554995038, |
|
"learning_rate": 0.00018467047099695905, |
|
"loss": 1.7036, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.7844865579550463, |
|
"grad_norm": 0.21832286860860883, |
|
"learning_rate": 0.00018453376401155562, |
|
"loss": 1.8127, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.7866901718818863, |
|
"grad_norm": 0.26225286533808284, |
|
"learning_rate": 0.00018439650122974087, |
|
"loss": 1.7398, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.7888937858087263, |
|
"grad_norm": 0.2585197080539731, |
|
"learning_rate": 0.0001842586835539964, |
|
"loss": 1.8645, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.7910973997355664, |
|
"grad_norm": 0.24265804391287996, |
|
"learning_rate": 0.00018412031189045196, |
|
"loss": 1.7356, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.7933010136624064, |
|
"grad_norm": 0.2590383769624523, |
|
"learning_rate": 0.00018398138714887993, |
|
"loss": 1.6518, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.7955046275892463, |
|
"grad_norm": 0.23488722647726504, |
|
"learning_rate": 0.00018384191024268894, |
|
"loss": 1.8054, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.7977082415160864, |
|
"grad_norm": 0.23606528165986015, |
|
"learning_rate": 0.00018370188208891803, |
|
"loss": 1.6994, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.7999118554429264, |
|
"grad_norm": 0.29081075890767155, |
|
"learning_rate": 0.00018356130360823068, |
|
"loss": 1.987, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.8021154693697664, |
|
"grad_norm": 0.24366570601075402, |
|
"learning_rate": 0.00018342017572490858, |
|
"loss": 1.5363, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.8043190832966064, |
|
"grad_norm": 0.2677025615653137, |
|
"learning_rate": 0.0001832784993668458, |
|
"loss": 1.7781, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.8065226972234465, |
|
"grad_norm": 0.22776397402134876, |
|
"learning_rate": 0.0001831362754655424, |
|
"loss": 1.8064, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.8087263111502865, |
|
"grad_norm": 0.21327160658864644, |
|
"learning_rate": 0.0001829935049560985, |
|
"loss": 1.5278, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.8109299250771265, |
|
"grad_norm": 0.4311681362534179, |
|
"learning_rate": 0.0001828501887772081, |
|
"loss": 1.9316, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.8131335390039665, |
|
"grad_norm": 0.3127936675415962, |
|
"learning_rate": 0.00018270632787115295, |
|
"loss": 1.9393, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.8153371529308066, |
|
"grad_norm": 0.2872735828645766, |
|
"learning_rate": 0.0001825619231837962, |
|
"loss": 1.8913, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.8175407668576465, |
|
"grad_norm": 0.23659304750850696, |
|
"learning_rate": 0.0001824169756645763, |
|
"loss": 1.79, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.8197443807844865, |
|
"grad_norm": 0.22502963250778782, |
|
"learning_rate": 0.00018227148626650072, |
|
"loss": 1.7616, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.8219479947113266, |
|
"grad_norm": 0.22975193518596113, |
|
"learning_rate": 0.00018212545594613978, |
|
"loss": 1.7862, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.8241516086381666, |
|
"grad_norm": 0.22813094138391649, |
|
"learning_rate": 0.00018197888566362023, |
|
"loss": 1.6909, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.8263552225650066, |
|
"grad_norm": 0.3170639776475332, |
|
"learning_rate": 0.00018183177638261895, |
|
"loss": 1.8876, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.8285588364918466, |
|
"grad_norm": 0.25433638493270494, |
|
"learning_rate": 0.00018168412907035672, |
|
"loss": 1.7447, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.8307624504186867, |
|
"grad_norm": 0.2527115922098035, |
|
"learning_rate": 0.00018153594469759175, |
|
"loss": 1.7288, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.8329660643455267, |
|
"grad_norm": 0.24448176152726306, |
|
"learning_rate": 0.00018138722423861333, |
|
"loss": 1.8385, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.8351696782723667, |
|
"grad_norm": 0.2797043743850419, |
|
"learning_rate": 0.00018123796867123548, |
|
"loss": 1.8353, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.8373732921992068, |
|
"grad_norm": 0.26948384291872024, |
|
"learning_rate": 0.00018108817897679043, |
|
"loss": 1.6995, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8395769061260467, |
|
"grad_norm": 0.23050209288787957, |
|
"learning_rate": 0.00018093785614012228, |
|
"loss": 1.7752, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.8417805200528867, |
|
"grad_norm": 0.28186670258249874, |
|
"learning_rate": 0.0001807870011495803, |
|
"loss": 1.8608, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.8439841339797267, |
|
"grad_norm": 0.29105838653881044, |
|
"learning_rate": 0.00018063561499701282, |
|
"loss": 1.8997, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.8461877479065668, |
|
"grad_norm": 0.29426186229081736, |
|
"learning_rate": 0.00018048369867776029, |
|
"loss": 1.6416, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.8483913618334068, |
|
"grad_norm": 0.21168547827802603, |
|
"learning_rate": 0.00018033125319064902, |
|
"loss": 1.8158, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.8505949757602468, |
|
"grad_norm": 0.35188920795977724, |
|
"learning_rate": 0.00018017827953798444, |
|
"loss": 1.7531, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.8527985896870868, |
|
"grad_norm": 0.2979347408479712, |
|
"learning_rate": 0.0001800247787255447, |
|
"loss": 1.9657, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.8550022036139269, |
|
"grad_norm": 0.3102746795792285, |
|
"learning_rate": 0.00017987075176257382, |
|
"loss": 1.5273, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.8572058175407669, |
|
"grad_norm": 0.2141203314519693, |
|
"learning_rate": 0.00017971619966177524, |
|
"loss": 1.7978, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.8594094314676068, |
|
"grad_norm": 0.3010217472666534, |
|
"learning_rate": 0.00017956112343930512, |
|
"loss": 1.8066, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.8616130453944469, |
|
"grad_norm": 0.2685511500450377, |
|
"learning_rate": 0.00017940552411476566, |
|
"loss": 1.8096, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.8638166593212869, |
|
"grad_norm": 0.3648269512255727, |
|
"learning_rate": 0.00017924940271119827, |
|
"loss": 1.9212, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.8660202732481269, |
|
"grad_norm": 0.23673592232409962, |
|
"learning_rate": 0.00017909276025507696, |
|
"loss": 1.9925, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.8682238871749669, |
|
"grad_norm": 0.22051543871837528, |
|
"learning_rate": 0.00017893559777630173, |
|
"loss": 1.8895, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.870427501101807, |
|
"grad_norm": 0.25128906373836973, |
|
"learning_rate": 0.00017877791630819149, |
|
"loss": 1.7637, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.872631115028647, |
|
"grad_norm": 0.2982588862018805, |
|
"learning_rate": 0.00017861971688747747, |
|
"loss": 1.865, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.874834728955487, |
|
"grad_norm": 0.2927080940082808, |
|
"learning_rate": 0.00017846100055429642, |
|
"loss": 1.742, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.8770383428823271, |
|
"grad_norm": 0.30412581475879524, |
|
"learning_rate": 0.00017830176835218368, |
|
"loss": 1.6706, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.8792419568091671, |
|
"grad_norm": 0.2902194881075315, |
|
"learning_rate": 0.0001781420213280662, |
|
"loss": 1.8014, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.881445570736007, |
|
"grad_norm": 0.3378104042442177, |
|
"learning_rate": 0.00017798176053225606, |
|
"loss": 1.8318, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.883649184662847, |
|
"grad_norm": 0.2465500138547731, |
|
"learning_rate": 0.0001778209870184431, |
|
"loss": 1.6756, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.8858527985896871, |
|
"grad_norm": 0.27205493094420413, |
|
"learning_rate": 0.00017765970184368835, |
|
"loss": 1.7398, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.8880564125165271, |
|
"grad_norm": 0.28881718844442433, |
|
"learning_rate": 0.0001774979060684168, |
|
"loss": 1.8652, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.8902600264433671, |
|
"grad_norm": 0.2930186193777016, |
|
"learning_rate": 0.0001773356007564107, |
|
"loss": 1.7748, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.8924636403702071, |
|
"grad_norm": 0.28319545050901546, |
|
"learning_rate": 0.0001771727869748023, |
|
"loss": 1.7198, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.8946672542970472, |
|
"grad_norm": 0.26485391899745814, |
|
"learning_rate": 0.000177009465794067, |
|
"loss": 1.7109, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.8968708682238872, |
|
"grad_norm": 0.2367084302014588, |
|
"learning_rate": 0.0001768456382880163, |
|
"loss": 1.773, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.8990744821507272, |
|
"grad_norm": 0.28707015291179266, |
|
"learning_rate": 0.00017668130553379063, |
|
"loss": 1.8698, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.9012780960775673, |
|
"grad_norm": 0.26521748140973644, |
|
"learning_rate": 0.00017651646861185252, |
|
"loss": 1.5433, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.9034817100044072, |
|
"grad_norm": 0.25186929013428017, |
|
"learning_rate": 0.0001763511286059791, |
|
"loss": 1.7003, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.9056853239312472, |
|
"grad_norm": 0.2646341657457682, |
|
"learning_rate": 0.0001761852866032554, |
|
"loss": 1.8017, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.9078889378580872, |
|
"grad_norm": 0.27426709503702656, |
|
"learning_rate": 0.0001760189436940669, |
|
"loss": 1.717, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.9100925517849273, |
|
"grad_norm": 0.2909513812455515, |
|
"learning_rate": 0.00017585210097209242, |
|
"loss": 1.8286, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.9122961657117673, |
|
"grad_norm": 0.3195261689470233, |
|
"learning_rate": 0.00017568475953429706, |
|
"loss": 1.9248, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.9144997796386073, |
|
"grad_norm": 0.2559454772629871, |
|
"learning_rate": 0.00017551692048092487, |
|
"loss": 1.9235, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.9167033935654474, |
|
"grad_norm": 0.2787085951940099, |
|
"learning_rate": 0.00017534858491549167, |
|
"loss": 1.5563, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.9189070074922874, |
|
"grad_norm": 0.27430346968172775, |
|
"learning_rate": 0.00017517975394477765, |
|
"loss": 1.7408, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.9211106214191274, |
|
"grad_norm": 0.3057530869061833, |
|
"learning_rate": 0.00017501042867882043, |
|
"loss": 1.8029, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.9233142353459673, |
|
"grad_norm": 0.28792933063158205, |
|
"learning_rate": 0.0001748406102309073, |
|
"loss": 1.7174, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.9255178492728074, |
|
"grad_norm": 0.2926998767200136, |
|
"learning_rate": 0.00017467029971756837, |
|
"loss": 1.7753, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.9277214631996474, |
|
"grad_norm": 0.3202640628143745, |
|
"learning_rate": 0.00017449949825856881, |
|
"loss": 1.6815, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.9299250771264874, |
|
"grad_norm": 0.2898987783743886, |
|
"learning_rate": 0.00017432820697690183, |
|
"loss": 1.5471, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.9321286910533274, |
|
"grad_norm": 0.25429358796976326, |
|
"learning_rate": 0.00017415642699878108, |
|
"loss": 1.801, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.9343323049801675, |
|
"grad_norm": 0.2906871474145296, |
|
"learning_rate": 0.00017398415945363326, |
|
"loss": 1.7255, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.9365359189070075, |
|
"grad_norm": 0.24941394640440692, |
|
"learning_rate": 0.00017381140547409091, |
|
"loss": 1.7265, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.9387395328338475, |
|
"grad_norm": 0.2680278555985492, |
|
"learning_rate": 0.00017363816619598462, |
|
"loss": 1.8507, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.9409431467606876, |
|
"grad_norm": 0.24326339969260075, |
|
"learning_rate": 0.00017346444275833587, |
|
"loss": 1.8278, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.9431467606875276, |
|
"grad_norm": 0.2881192799230667, |
|
"learning_rate": 0.00017329023630334935, |
|
"loss": 1.6301, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.9453503746143676, |
|
"grad_norm": 0.22324975257691354, |
|
"learning_rate": 0.00017311554797640552, |
|
"loss": 1.8182, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.9475539885412075, |
|
"grad_norm": 0.38310870823903137, |
|
"learning_rate": 0.0001729403789260531, |
|
"loss": 1.6758, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.9497576024680476, |
|
"grad_norm": 0.3082503271893105, |
|
"learning_rate": 0.0001727647303040015, |
|
"loss": 1.717, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.9519612163948876, |
|
"grad_norm": 0.2910152870088472, |
|
"learning_rate": 0.00017258860326511318, |
|
"loss": 1.6762, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.9541648303217276, |
|
"grad_norm": 0.2751619266213339, |
|
"learning_rate": 0.00017241199896739614, |
|
"loss": 1.5402, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.9563684442485677, |
|
"grad_norm": 0.2633670365064433, |
|
"learning_rate": 0.00017223491857199636, |
|
"loss": 1.6089, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.9585720581754077, |
|
"grad_norm": 0.26253468282607706, |
|
"learning_rate": 0.00017205736324318999, |
|
"loss": 1.8698, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.9607756721022477, |
|
"grad_norm": 0.2623810146620032, |
|
"learning_rate": 0.0001718793341483758, |
|
"loss": 1.7996, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.9629792860290877, |
|
"grad_norm": 0.2541467303967672, |
|
"learning_rate": 0.00017170083245806757, |
|
"loss": 1.7066, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.9651828999559278, |
|
"grad_norm": 0.2450837271489877, |
|
"learning_rate": 0.00017152185934588623, |
|
"loss": 1.9326, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.9673865138827678, |
|
"grad_norm": 0.3277904173770177, |
|
"learning_rate": 0.00017134241598855236, |
|
"loss": 1.941, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.9695901278096077, |
|
"grad_norm": 0.26036896345598354, |
|
"learning_rate": 0.0001711625035658782, |
|
"loss": 1.7543, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9717937417364477, |
|
"grad_norm": 0.294563677729738, |
|
"learning_rate": 0.00017098212326076008, |
|
"loss": 1.6402, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.9739973556632878, |
|
"grad_norm": 0.3220804392466043, |
|
"learning_rate": 0.0001708012762591706, |
|
"loss": 1.7078, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.9762009695901278, |
|
"grad_norm": 0.2811686235748074, |
|
"learning_rate": 0.00017061996375015078, |
|
"loss": 1.9067, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.9784045835169678, |
|
"grad_norm": 0.23864027527398188, |
|
"learning_rate": 0.00017043818692580228, |
|
"loss": 1.9146, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.9806081974438079, |
|
"grad_norm": 0.2954583734368435, |
|
"learning_rate": 0.00017025594698127965, |
|
"loss": 1.791, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.9828118113706479, |
|
"grad_norm": 0.2510099463630807, |
|
"learning_rate": 0.00017007324511478223, |
|
"loss": 1.5883, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.9850154252974879, |
|
"grad_norm": 0.27874972856939056, |
|
"learning_rate": 0.00016989008252754655, |
|
"loss": 1.7865, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.9872190392243279, |
|
"grad_norm": 0.2611091563705049, |
|
"learning_rate": 0.00016970646042383826, |
|
"loss": 1.8104, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.989422653151168, |
|
"grad_norm": 0.28511909006315234, |
|
"learning_rate": 0.00016952238001094428, |
|
"loss": 1.6686, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.9916262670780079, |
|
"grad_norm": 0.3381628494565006, |
|
"learning_rate": 0.00016933784249916476, |
|
"loss": 1.9412, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.9938298810048479, |
|
"grad_norm": 0.28520119611042416, |
|
"learning_rate": 0.00016915284910180533, |
|
"loss": 1.6889, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.996033494931688, |
|
"grad_norm": 0.2883852687543481, |
|
"learning_rate": 0.00016896740103516895, |
|
"loss": 1.8003, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.998237108858528, |
|
"grad_norm": 0.24451797378236065, |
|
"learning_rate": 0.0001687814995185479, |
|
"loss": 1.8752, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 1.000440722785368, |
|
"grad_norm": 0.33508649606500057, |
|
"learning_rate": 0.00016859514577421592, |
|
"loss": 1.8299, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.002644336712208, |
|
"grad_norm": 0.2636718535549577, |
|
"learning_rate": 0.00016840834102741997, |
|
"loss": 1.706, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.004847950639048, |
|
"grad_norm": 0.27046399540212357, |
|
"learning_rate": 0.00016822108650637238, |
|
"loss": 1.6819, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.007051564565888, |
|
"grad_norm": 0.30877609431387876, |
|
"learning_rate": 0.00016803338344224266, |
|
"loss": 1.7218, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 1.0092551784927282, |
|
"grad_norm": 0.23492423335336904, |
|
"learning_rate": 0.00016784523306914934, |
|
"loss": 1.57, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.0114587924195682, |
|
"grad_norm": 0.2795118641324036, |
|
"learning_rate": 0.00016765663662415204, |
|
"loss": 1.7023, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 1.0136624063464081, |
|
"grad_norm": 0.34933397332267874, |
|
"learning_rate": 0.00016746759534724316, |
|
"loss": 1.9401, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.0158660202732481, |
|
"grad_norm": 0.2928363020374088, |
|
"learning_rate": 0.00016727811048133985, |
|
"loss": 1.7873, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 1.0180696342000881, |
|
"grad_norm": 0.2765841121495588, |
|
"learning_rate": 0.00016708818327227574, |
|
"loss": 1.6925, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.020273248126928, |
|
"grad_norm": 0.3407816261608907, |
|
"learning_rate": 0.00016689781496879283, |
|
"loss": 1.7583, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 1.022476862053768, |
|
"grad_norm": 0.3837729171353601, |
|
"learning_rate": 0.00016670700682253328, |
|
"loss": 1.7058, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.0246804759806083, |
|
"grad_norm": 0.2833490201778371, |
|
"learning_rate": 0.00016651576008803112, |
|
"loss": 1.7306, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.0268840899074483, |
|
"grad_norm": 0.24052446562056756, |
|
"learning_rate": 0.00016632407602270398, |
|
"loss": 1.6612, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.0290877038342883, |
|
"grad_norm": 0.2706767115179465, |
|
"learning_rate": 0.00016613195588684488, |
|
"loss": 1.5943, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 1.0312913177611283, |
|
"grad_norm": 0.2694153548940927, |
|
"learning_rate": 0.00016593940094361407, |
|
"loss": 1.7072, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.0334949316879682, |
|
"grad_norm": 0.2675415111083724, |
|
"learning_rate": 0.0001657464124590304, |
|
"loss": 1.7392, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 1.0356985456148082, |
|
"grad_norm": 0.2783138545253644, |
|
"learning_rate": 0.00016555299170196332, |
|
"loss": 1.7264, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.0379021595416482, |
|
"grad_norm": 0.29818023025744944, |
|
"learning_rate": 0.00016535913994412436, |
|
"loss": 1.6038, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 1.0401057734684884, |
|
"grad_norm": 0.30521070526088534, |
|
"learning_rate": 0.00016516485846005882, |
|
"loss": 1.652, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.0423093873953284, |
|
"grad_norm": 0.24990284589728196, |
|
"learning_rate": 0.00016497014852713738, |
|
"loss": 1.5745, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 1.0445130013221684, |
|
"grad_norm": 0.36544530123344093, |
|
"learning_rate": 0.0001647750114255477, |
|
"loss": 1.8678, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.0467166152490084, |
|
"grad_norm": 0.2984936690547218, |
|
"learning_rate": 0.000164579448438286, |
|
"loss": 1.8263, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.0489202291758484, |
|
"grad_norm": 0.3138432337464576, |
|
"learning_rate": 0.00016438346085114865, |
|
"loss": 1.7807, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.0511238431026884, |
|
"grad_norm": 0.409332779624452, |
|
"learning_rate": 0.00016418704995272373, |
|
"loss": 1.8612, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 1.0533274570295283, |
|
"grad_norm": 0.27134598526523496, |
|
"learning_rate": 0.00016399021703438247, |
|
"loss": 1.7323, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.0555310709563686, |
|
"grad_norm": 0.2852569861948395, |
|
"learning_rate": 0.0001637929633902708, |
|
"loss": 1.7619, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 1.0577346848832085, |
|
"grad_norm": 0.2913405836362002, |
|
"learning_rate": 0.00016359529031730093, |
|
"loss": 1.8196, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.0599382988100485, |
|
"grad_norm": 0.3029325708755559, |
|
"learning_rate": 0.00016339719911514272, |
|
"loss": 1.7579, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 1.0621419127368885, |
|
"grad_norm": 0.2738995593865038, |
|
"learning_rate": 0.00016319869108621512, |
|
"loss": 1.8309, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.0643455266637285, |
|
"grad_norm": 0.26503420605103173, |
|
"learning_rate": 0.00016299976753567772, |
|
"loss": 1.708, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 1.0665491405905685, |
|
"grad_norm": 0.32283303116470646, |
|
"learning_rate": 0.00016280042977142204, |
|
"loss": 1.6915, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.0687527545174085, |
|
"grad_norm": 0.24107839513422735, |
|
"learning_rate": 0.00016260067910406304, |
|
"loss": 1.5685, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.0709563684442487, |
|
"grad_norm": 0.29608435429464214, |
|
"learning_rate": 0.00016240051684693042, |
|
"loss": 1.7239, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.0731599823710887, |
|
"grad_norm": 0.29986988301001716, |
|
"learning_rate": 0.00016219994431606005, |
|
"loss": 1.6816, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 1.0753635962979287, |
|
"grad_norm": 0.24843757070636935, |
|
"learning_rate": 0.00016199896283018527, |
|
"loss": 1.5677, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.0775672102247686, |
|
"grad_norm": 0.2983161128962509, |
|
"learning_rate": 0.00016179757371072824, |
|
"loss": 1.7859, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 1.0797708241516086, |
|
"grad_norm": 0.2787284156730912, |
|
"learning_rate": 0.00016159577828179123, |
|
"loss": 1.562, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.0819744380784486, |
|
"grad_norm": 0.3269743373279123, |
|
"learning_rate": 0.0001613935778701479, |
|
"loss": 1.8759, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 1.0841780520052886, |
|
"grad_norm": 0.2802831939826356, |
|
"learning_rate": 0.0001611909738052347, |
|
"loss": 1.7401, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.0863816659321286, |
|
"grad_norm": 0.23846388238067243, |
|
"learning_rate": 0.000160987967419142, |
|
"loss": 1.727, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 1.0885852798589688, |
|
"grad_norm": 0.2659078946356941, |
|
"learning_rate": 0.00016078456004660536, |
|
"loss": 1.6454, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.0907888937858088, |
|
"grad_norm": 0.29111397323788823, |
|
"learning_rate": 0.00016058075302499673, |
|
"loss": 1.7724, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.0929925077126488, |
|
"grad_norm": 0.35649745669814514, |
|
"learning_rate": 0.00016037654769431576, |
|
"loss": 1.6527, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.0951961216394888, |
|
"grad_norm": 0.28097148721584675, |
|
"learning_rate": 0.00016017194539718086, |
|
"loss": 1.7563, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 1.0973997355663287, |
|
"grad_norm": 0.39237884989816185, |
|
"learning_rate": 0.0001599669474788205, |
|
"loss": 1.8656, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.0996033494931687, |
|
"grad_norm": 0.2633010799321111, |
|
"learning_rate": 0.00015976155528706415, |
|
"loss": 1.7129, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 1.1018069634200087, |
|
"grad_norm": 0.2719172135219212, |
|
"learning_rate": 0.0001595557701723338, |
|
"loss": 1.5688, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.104010577346849, |
|
"grad_norm": 0.2731378192802766, |
|
"learning_rate": 0.00015934959348763467, |
|
"loss": 1.7727, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 1.106214191273689, |
|
"grad_norm": 0.31808171740983593, |
|
"learning_rate": 0.00015914302658854657, |
|
"loss": 1.8461, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.108417805200529, |
|
"grad_norm": 0.2869427974730242, |
|
"learning_rate": 0.00015893607083321477, |
|
"loss": 1.8664, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 1.110621419127369, |
|
"grad_norm": 0.2187585282755932, |
|
"learning_rate": 0.00015872872758234148, |
|
"loss": 1.6029, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.1128250330542089, |
|
"grad_norm": 0.2667619562054791, |
|
"learning_rate": 0.00015852099819917639, |
|
"loss": 1.8981, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.1150286469810489, |
|
"grad_norm": 0.3372014415136272, |
|
"learning_rate": 0.00015831288404950802, |
|
"loss": 1.7639, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.1172322609078889, |
|
"grad_norm": 0.3048158336194716, |
|
"learning_rate": 0.0001581043865016547, |
|
"loss": 1.6387, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 1.1194358748347288, |
|
"grad_norm": 0.2971544124275263, |
|
"learning_rate": 0.00015789550692645556, |
|
"loss": 1.7692, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.121639488761569, |
|
"grad_norm": 0.2518439420968902, |
|
"learning_rate": 0.00015768624669726145, |
|
"loss": 1.5533, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 1.123843102688409, |
|
"grad_norm": 0.28947857100850594, |
|
"learning_rate": 0.00015747660718992598, |
|
"loss": 1.6443, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.126046716615249, |
|
"grad_norm": 0.27744579032481387, |
|
"learning_rate": 0.00015726658978279642, |
|
"loss": 1.7146, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 1.128250330542089, |
|
"grad_norm": 0.28434343297956843, |
|
"learning_rate": 0.00015705619585670478, |
|
"loss": 1.8686, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.130453944468929, |
|
"grad_norm": 0.26116640553101533, |
|
"learning_rate": 0.00015684542679495847, |
|
"loss": 1.7831, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 1.132657558395769, |
|
"grad_norm": 0.31380511545232626, |
|
"learning_rate": 0.00015663428398333157, |
|
"loss": 1.6778, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.134861172322609, |
|
"grad_norm": 0.2747114435356579, |
|
"learning_rate": 0.0001564227688100552, |
|
"loss": 1.6324, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.1370647862494492, |
|
"grad_norm": 0.2678076483406117, |
|
"learning_rate": 0.00015621088266580904, |
|
"loss": 1.4946, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.1392684001762892, |
|
"grad_norm": 0.33262666113926864, |
|
"learning_rate": 0.00015599862694371157, |
|
"loss": 1.882, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 1.1414720141031292, |
|
"grad_norm": 0.30861283361311576, |
|
"learning_rate": 0.00015578600303931136, |
|
"loss": 1.6738, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.1436756280299691, |
|
"grad_norm": 0.30927635505724477, |
|
"learning_rate": 0.00015557301235057767, |
|
"loss": 1.7005, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 1.1458792419568091, |
|
"grad_norm": 0.28688292761286854, |
|
"learning_rate": 0.00015535965627789126, |
|
"loss": 1.6462, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.1480828558836491, |
|
"grad_norm": 0.27171604870426835, |
|
"learning_rate": 0.00015514593622403532, |
|
"loss": 1.585, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 1.150286469810489, |
|
"grad_norm": 0.3092542662127764, |
|
"learning_rate": 0.0001549318535941861, |
|
"loss": 1.9096, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.1524900837373293, |
|
"grad_norm": 0.2716117253477657, |
|
"learning_rate": 0.00015471740979590377, |
|
"loss": 1.7352, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 1.1546936976641693, |
|
"grad_norm": 0.3066935084879198, |
|
"learning_rate": 0.0001545026062391231, |
|
"loss": 1.8141, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.1568973115910093, |
|
"grad_norm": 0.346363671435922, |
|
"learning_rate": 0.00015428744433614415, |
|
"loss": 1.5573, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.1591009255178493, |
|
"grad_norm": 0.29157425354464905, |
|
"learning_rate": 0.00015407192550162318, |
|
"loss": 1.5464, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.1613045394446893, |
|
"grad_norm": 0.2806374114105095, |
|
"learning_rate": 0.0001538560511525632, |
|
"loss": 1.6386, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 1.1635081533715292, |
|
"grad_norm": 0.2982356223064332, |
|
"learning_rate": 0.0001536398227083046, |
|
"loss": 1.7813, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.1657117672983692, |
|
"grad_norm": 0.33202427377524757, |
|
"learning_rate": 0.00015342324159051587, |
|
"loss": 1.9532, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 1.1679153812252094, |
|
"grad_norm": 0.33897582151852124, |
|
"learning_rate": 0.00015320630922318444, |
|
"loss": 1.7746, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.1701189951520494, |
|
"grad_norm": 0.3321654404402584, |
|
"learning_rate": 0.00015298902703260692, |
|
"loss": 2.0143, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 1.1723226090788894, |
|
"grad_norm": 0.21833177585011845, |
|
"learning_rate": 0.0001527713964473802, |
|
"loss": 1.6702, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.1745262230057294, |
|
"grad_norm": 0.26595318714741284, |
|
"learning_rate": 0.00015255341889839157, |
|
"loss": 1.683, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 1.1767298369325694, |
|
"grad_norm": 0.2880553889858748, |
|
"learning_rate": 0.00015233509581880973, |
|
"loss": 1.6248, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.1789334508594094, |
|
"grad_norm": 0.2966201251141622, |
|
"learning_rate": 0.0001521164286440751, |
|
"loss": 1.5788, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.1811370647862494, |
|
"grad_norm": 0.32819026600076084, |
|
"learning_rate": 0.00015189741881189054, |
|
"loss": 1.6132, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.1833406787130896, |
|
"grad_norm": 0.25598509051489227, |
|
"learning_rate": 0.00015167806776221178, |
|
"loss": 1.7194, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 1.1855442926399296, |
|
"grad_norm": 0.32216524743892266, |
|
"learning_rate": 0.000151458376937238, |
|
"loss": 1.6499, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.1877479065667695, |
|
"grad_norm": 0.25107175983686675, |
|
"learning_rate": 0.00015123834778140233, |
|
"loss": 1.6059, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 1.1899515204936095, |
|
"grad_norm": 0.26100544568346645, |
|
"learning_rate": 0.00015101798174136247, |
|
"loss": 1.7677, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.1921551344204495, |
|
"grad_norm": 0.28164900490660866, |
|
"learning_rate": 0.000150797280265991, |
|
"loss": 1.6141, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 1.1943587483472895, |
|
"grad_norm": 0.25606567943961145, |
|
"learning_rate": 0.00015057624480636594, |
|
"loss": 1.6868, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.1965623622741295, |
|
"grad_norm": 0.30536561643444826, |
|
"learning_rate": 0.0001503548768157612, |
|
"loss": 1.515, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 1.1987659762009697, |
|
"grad_norm": 0.2495814195185963, |
|
"learning_rate": 0.00015013317774963708, |
|
"loss": 1.5754, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.2009695901278097, |
|
"grad_norm": 0.3292329862356163, |
|
"learning_rate": 0.00014991114906563055, |
|
"loss": 1.6599, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.2031732040546497, |
|
"grad_norm": 0.28736997494401945, |
|
"learning_rate": 0.00014968879222354597, |
|
"loss": 1.6939, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.2053768179814897, |
|
"grad_norm": 0.28066545139959265, |
|
"learning_rate": 0.00014946610868534502, |
|
"loss": 1.5954, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 1.2075804319083296, |
|
"grad_norm": 0.27963157670324124, |
|
"learning_rate": 0.00014924309991513757, |
|
"loss": 1.6816, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.2097840458351696, |
|
"grad_norm": 0.3074722093066757, |
|
"learning_rate": 0.0001490197673791717, |
|
"loss": 1.5102, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 1.2119876597620096, |
|
"grad_norm": 0.5175039724428968, |
|
"learning_rate": 0.00014879611254582428, |
|
"loss": 1.8587, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.2141912736888498, |
|
"grad_norm": 0.4607644456138267, |
|
"learning_rate": 0.00014857213688559124, |
|
"loss": 1.8861, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 1.2163948876156898, |
|
"grad_norm": 0.203471979843397, |
|
"learning_rate": 0.00014834784187107785, |
|
"loss": 1.5549, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.2185985015425298, |
|
"grad_norm": 0.27836500512738993, |
|
"learning_rate": 0.00014812322897698912, |
|
"loss": 1.6677, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 1.2208021154693698, |
|
"grad_norm": 0.2681432382916657, |
|
"learning_rate": 0.00014789829968012, |
|
"loss": 1.8601, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.2230057293962098, |
|
"grad_norm": 0.24822408823990583, |
|
"learning_rate": 0.00014767305545934588, |
|
"loss": 1.9008, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.2252093433230498, |
|
"grad_norm": 0.26961514947075566, |
|
"learning_rate": 0.00014744749779561258, |
|
"loss": 1.7573, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.2274129572498897, |
|
"grad_norm": 0.40650037835816966, |
|
"learning_rate": 0.0001472216281719269, |
|
"loss": 1.6177, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 1.22961657117673, |
|
"grad_norm": 0.27436209441284687, |
|
"learning_rate": 0.0001469954480733465, |
|
"loss": 1.6021, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.23182018510357, |
|
"grad_norm": 0.2563256590060921, |
|
"learning_rate": 0.00014676895898697062, |
|
"loss": 1.9842, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 1.23402379903041, |
|
"grad_norm": 0.35591941682342815, |
|
"learning_rate": 0.00014654216240192995, |
|
"loss": 1.6028, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.23622741295725, |
|
"grad_norm": 0.33349441263276575, |
|
"learning_rate": 0.00014631505980937688, |
|
"loss": 1.852, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 1.23843102688409, |
|
"grad_norm": 0.31528740240587627, |
|
"learning_rate": 0.0001460876527024758, |
|
"loss": 1.5587, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.24063464081093, |
|
"grad_norm": 0.25877213395041015, |
|
"learning_rate": 0.00014585994257639324, |
|
"loss": 1.5482, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 1.2428382547377699, |
|
"grad_norm": 0.2910006006185105, |
|
"learning_rate": 0.00014563193092828803, |
|
"loss": 1.6998, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.24504186866461, |
|
"grad_norm": 0.37486054420050446, |
|
"learning_rate": 0.00014540361925730147, |
|
"loss": 1.8516, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.24724548259145, |
|
"grad_norm": 0.34335913235176224, |
|
"learning_rate": 0.00014517500906454742, |
|
"loss": 1.6384, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.24944909651829, |
|
"grad_norm": 0.2930060418053195, |
|
"learning_rate": 0.00014494610185310252, |
|
"loss": 1.8508, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 1.25165271044513, |
|
"grad_norm": 0.27045205482633095, |
|
"learning_rate": 0.00014471689912799626, |
|
"loss": 1.5935, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.25385632437197, |
|
"grad_norm": 0.30870157000744336, |
|
"learning_rate": 0.00014448740239620108, |
|
"loss": 1.8287, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 1.25605993829881, |
|
"grad_norm": 0.27087387170107313, |
|
"learning_rate": 0.00014425761316662241, |
|
"loss": 1.9209, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.25826355222565, |
|
"grad_norm": 0.32362518237545235, |
|
"learning_rate": 0.0001440275329500889, |
|
"loss": 1.7297, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 1.2604671661524902, |
|
"grad_norm": 0.3473479960796504, |
|
"learning_rate": 0.00014379716325934236, |
|
"loss": 1.7847, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.26267078007933, |
|
"grad_norm": 0.2985625193084418, |
|
"learning_rate": 0.0001435665056090278, |
|
"loss": 1.7811, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 1.2648743940061702, |
|
"grad_norm": 0.2549368072217001, |
|
"learning_rate": 0.00014333556151568364, |
|
"loss": 1.8424, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.2670780079330102, |
|
"grad_norm": 0.3031064940015495, |
|
"learning_rate": 0.00014310433249773146, |
|
"loss": 1.8502, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.2692816218598502, |
|
"grad_norm": 0.2398504180714546, |
|
"learning_rate": 0.00014287282007546627, |
|
"loss": 1.648, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.2714852357866901, |
|
"grad_norm": 0.33873410733492354, |
|
"learning_rate": 0.00014264102577104645, |
|
"loss": 1.6617, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 1.2736888497135301, |
|
"grad_norm": 0.2655552963090036, |
|
"learning_rate": 0.00014240895110848365, |
|
"loss": 1.7205, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.2758924636403703, |
|
"grad_norm": 0.30714278749605195, |
|
"learning_rate": 0.0001421765976136328, |
|
"loss": 1.7343, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 1.27809607756721, |
|
"grad_norm": 0.31410869299454564, |
|
"learning_rate": 0.0001419439668141822, |
|
"loss": 1.7369, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.2802996914940503, |
|
"grad_norm": 0.29231072094243427, |
|
"learning_rate": 0.0001417110602396434, |
|
"loss": 1.5317, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 1.2825033054208903, |
|
"grad_norm": 0.26156390622111436, |
|
"learning_rate": 0.00014147787942134089, |
|
"loss": 1.4907, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.2847069193477303, |
|
"grad_norm": 0.2432071100976, |
|
"learning_rate": 0.00014124442589240265, |
|
"loss": 1.7181, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 1.2869105332745703, |
|
"grad_norm": 0.28813004538064096, |
|
"learning_rate": 0.00014101070118774936, |
|
"loss": 1.7243, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.2891141472014103, |
|
"grad_norm": 0.29339284132217475, |
|
"learning_rate": 0.00014077670684408485, |
|
"loss": 1.7679, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.2913177611282503, |
|
"grad_norm": 0.2588877094640161, |
|
"learning_rate": 0.00014054244439988566, |
|
"loss": 1.72, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.2935213750550902, |
|
"grad_norm": 0.3011003249431555, |
|
"learning_rate": 0.0001403079153953911, |
|
"loss": 1.879, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 1.2957249889819304, |
|
"grad_norm": 0.29129639306491034, |
|
"learning_rate": 0.00014007312137259307, |
|
"loss": 1.7124, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.2979286029087704, |
|
"grad_norm": 0.30248111079542994, |
|
"learning_rate": 0.00013983806387522592, |
|
"loss": 1.5669, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 1.3001322168356104, |
|
"grad_norm": 0.39466981086485026, |
|
"learning_rate": 0.00013960274444875628, |
|
"loss": 1.7579, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.3023358307624504, |
|
"grad_norm": 0.30172725615707247, |
|
"learning_rate": 0.000139367164640373, |
|
"loss": 1.8218, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 1.3045394446892904, |
|
"grad_norm": 0.286453848416204, |
|
"learning_rate": 0.00013913132599897683, |
|
"loss": 1.9354, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.3067430586161304, |
|
"grad_norm": 0.3902213358624171, |
|
"learning_rate": 0.00013889523007517028, |
|
"loss": 1.7235, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 1.3089466725429704, |
|
"grad_norm": 0.2589601846721671, |
|
"learning_rate": 0.00013865887842124755, |
|
"loss": 1.5088, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.3111502864698106, |
|
"grad_norm": 0.2838598126846581, |
|
"learning_rate": 0.0001384222725911842, |
|
"loss": 1.6694, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.3133539003966506, |
|
"grad_norm": 0.2985112357585295, |
|
"learning_rate": 0.00013818541414062683, |
|
"loss": 1.8195, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.3155575143234906, |
|
"grad_norm": 0.344496826298518, |
|
"learning_rate": 0.0001379483046268832, |
|
"loss": 1.7105, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 1.3177611282503305, |
|
"grad_norm": 0.2832321651335367, |
|
"learning_rate": 0.00013771094560891155, |
|
"loss": 1.6398, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.3199647421771705, |
|
"grad_norm": 0.3166967200246273, |
|
"learning_rate": 0.00013747333864731073, |
|
"loss": 1.8804, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 1.3221683561040105, |
|
"grad_norm": 0.29889345173462817, |
|
"learning_rate": 0.00013723548530430974, |
|
"loss": 1.5327, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.3243719700308505, |
|
"grad_norm": 0.2989561597186618, |
|
"learning_rate": 0.00013699738714375748, |
|
"loss": 1.8312, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 1.3265755839576907, |
|
"grad_norm": 0.27767734489817053, |
|
"learning_rate": 0.00013675904573111247, |
|
"loss": 1.7797, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.3287791978845307, |
|
"grad_norm": 0.3263812983049982, |
|
"learning_rate": 0.00013652046263343262, |
|
"loss": 1.7061, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 1.3309828118113707, |
|
"grad_norm": 0.24589188706441673, |
|
"learning_rate": 0.00013628163941936485, |
|
"loss": 1.7644, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.3331864257382107, |
|
"grad_norm": 0.277800302556096, |
|
"learning_rate": 0.00013604257765913484, |
|
"loss": 1.8151, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.3353900396650507, |
|
"grad_norm": 0.23353363973323982, |
|
"learning_rate": 0.0001358032789245366, |
|
"loss": 1.7236, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.3375936535918906, |
|
"grad_norm": 0.34178889147907426, |
|
"learning_rate": 0.00013556374478892232, |
|
"loss": 1.7669, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 1.3397972675187306, |
|
"grad_norm": 0.28321134962453065, |
|
"learning_rate": 0.00013532397682719185, |
|
"loss": 1.6165, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.3420008814455708, |
|
"grad_norm": 0.3160168756862356, |
|
"learning_rate": 0.00013508397661578242, |
|
"loss": 1.8131, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 1.3442044953724108, |
|
"grad_norm": 0.2800833181613442, |
|
"learning_rate": 0.0001348437457326582, |
|
"loss": 1.9182, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.3464081092992508, |
|
"grad_norm": 0.2679976561310916, |
|
"learning_rate": 0.00013460328575730019, |
|
"loss": 1.8312, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 1.3486117232260908, |
|
"grad_norm": 0.2899158112172882, |
|
"learning_rate": 0.00013436259827069534, |
|
"loss": 1.8217, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.3508153371529308, |
|
"grad_norm": 0.26525449374755994, |
|
"learning_rate": 0.00013412168485532676, |
|
"loss": 1.7636, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 1.3530189510797708, |
|
"grad_norm": 0.2643909185128004, |
|
"learning_rate": 0.00013388054709516272, |
|
"loss": 1.6257, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.3552225650066108, |
|
"grad_norm": 0.3217683133395989, |
|
"learning_rate": 0.0001336391865756468, |
|
"loss": 1.8385, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.357426178933451, |
|
"grad_norm": 0.30256367251501726, |
|
"learning_rate": 0.00013339760488368695, |
|
"loss": 1.5994, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.359629792860291, |
|
"grad_norm": 0.33945781722722157, |
|
"learning_rate": 0.00013315580360764542, |
|
"loss": 1.6502, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 1.361833406787131, |
|
"grad_norm": 0.2343801676740979, |
|
"learning_rate": 0.00013291378433732818, |
|
"loss": 1.7302, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.364037020713971, |
|
"grad_norm": 0.3789024872984378, |
|
"learning_rate": 0.00013267154866397447, |
|
"loss": 1.9092, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 1.366240634640811, |
|
"grad_norm": 0.27745717625968813, |
|
"learning_rate": 0.00013242909818024628, |
|
"loss": 1.6587, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.368444248567651, |
|
"grad_norm": 0.26534498515018917, |
|
"learning_rate": 0.0001321864344802181, |
|
"loss": 1.7184, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 1.3706478624944909, |
|
"grad_norm": 0.25912670390615655, |
|
"learning_rate": 0.00013194355915936611, |
|
"loss": 1.7708, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.372851476421331, |
|
"grad_norm": 0.23803080715278022, |
|
"learning_rate": 0.000131700473814558, |
|
"loss": 1.7224, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 1.375055090348171, |
|
"grad_norm": 0.30746363012665606, |
|
"learning_rate": 0.00013145718004404223, |
|
"loss": 1.754, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.377258704275011, |
|
"grad_norm": 0.2672636477164184, |
|
"learning_rate": 0.00013121367944743777, |
|
"loss": 1.6989, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.379462318201851, |
|
"grad_norm": 0.28413083026015534, |
|
"learning_rate": 0.0001309699736257232, |
|
"loss": 1.6421, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.381665932128691, |
|
"grad_norm": 0.3193377255035563, |
|
"learning_rate": 0.00013072606418122667, |
|
"loss": 1.8467, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 1.383869546055531, |
|
"grad_norm": 0.2833583691387121, |
|
"learning_rate": 0.00013048195271761498, |
|
"loss": 1.6013, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.386073159982371, |
|
"grad_norm": 0.25616581325290944, |
|
"learning_rate": 0.00013023764083988323, |
|
"loss": 1.7542, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 1.3882767739092112, |
|
"grad_norm": 0.34179796686003233, |
|
"learning_rate": 0.0001299931301543442, |
|
"loss": 1.6674, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.390480387836051, |
|
"grad_norm": 0.2612422912889042, |
|
"learning_rate": 0.00012974842226861773, |
|
"loss": 1.3979, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 1.3926840017628912, |
|
"grad_norm": 0.30556335921079647, |
|
"learning_rate": 0.0001295035187916204, |
|
"loss": 1.7775, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.3948876156897312, |
|
"grad_norm": 0.34927377227523054, |
|
"learning_rate": 0.00012925842133355454, |
|
"loss": 1.7384, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 1.3970912296165712, |
|
"grad_norm": 0.35826503954646516, |
|
"learning_rate": 0.00012901313150589806, |
|
"loss": 1.8279, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.3992948435434112, |
|
"grad_norm": 0.3558470664702752, |
|
"learning_rate": 0.0001287676509213936, |
|
"loss": 1.6467, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.4014984574702511, |
|
"grad_norm": 0.2897608843662268, |
|
"learning_rate": 0.00012852198119403798, |
|
"loss": 1.6509, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.4037020713970914, |
|
"grad_norm": 0.32428607006990234, |
|
"learning_rate": 0.00012827612393907163, |
|
"loss": 1.7118, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 1.4059056853239311, |
|
"grad_norm": 0.29054441869310144, |
|
"learning_rate": 0.0001280300807729679, |
|
"loss": 1.6328, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.4081092992507713, |
|
"grad_norm": 0.2672079347199706, |
|
"learning_rate": 0.0001277838533134226, |
|
"loss": 1.7875, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 1.4103129131776113, |
|
"grad_norm": 0.3317588448314954, |
|
"learning_rate": 0.00012753744317934307, |
|
"loss": 1.9754, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.4125165271044513, |
|
"grad_norm": 0.30976057441678767, |
|
"learning_rate": 0.0001272908519908379, |
|
"loss": 1.7292, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 1.4147201410312913, |
|
"grad_norm": 0.2588899590749228, |
|
"learning_rate": 0.00012704408136920585, |
|
"loss": 1.661, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.4169237549581313, |
|
"grad_norm": 0.34042631761749226, |
|
"learning_rate": 0.0001267971329369256, |
|
"loss": 1.7688, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 1.4191273688849715, |
|
"grad_norm": 0.2613604846991468, |
|
"learning_rate": 0.00012655000831764495, |
|
"loss": 1.7979, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.4213309828118112, |
|
"grad_norm": 0.30387612038339795, |
|
"learning_rate": 0.00012630270913616985, |
|
"loss": 1.6008, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.4235345967386515, |
|
"grad_norm": 0.3245461529092582, |
|
"learning_rate": 0.00012605523701845431, |
|
"loss": 1.7394, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.4257382106654914, |
|
"grad_norm": 0.23964868596701627, |
|
"learning_rate": 0.00012580759359158905, |
|
"loss": 1.5526, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 1.4279418245923314, |
|
"grad_norm": 0.281150127445612, |
|
"learning_rate": 0.00012555978048379133, |
|
"loss": 1.6581, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.4301454385191714, |
|
"grad_norm": 0.31208534783523834, |
|
"learning_rate": 0.00012531179932439397, |
|
"loss": 1.6698, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 1.4323490524460114, |
|
"grad_norm": 0.3197645332854783, |
|
"learning_rate": 0.00012506365174383467, |
|
"loss": 1.8493, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.4345526663728516, |
|
"grad_norm": 0.29747281096057276, |
|
"learning_rate": 0.0001248153393736454, |
|
"loss": 1.923, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 1.4367562802996914, |
|
"grad_norm": 0.2706957926203667, |
|
"learning_rate": 0.00012456686384644148, |
|
"loss": 1.7219, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.4389598942265316, |
|
"grad_norm": 0.3015008988665459, |
|
"learning_rate": 0.00012431822679591112, |
|
"loss": 1.6334, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 1.4411635081533716, |
|
"grad_norm": 0.28824055515626146, |
|
"learning_rate": 0.00012406942985680437, |
|
"loss": 1.7096, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.4433671220802116, |
|
"grad_norm": 0.28792375010811966, |
|
"learning_rate": 0.00012382047466492262, |
|
"loss": 1.6993, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.4455707360070515, |
|
"grad_norm": 0.2778794221727809, |
|
"learning_rate": 0.0001235713628571077, |
|
"loss": 1.699, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.4477743499338915, |
|
"grad_norm": 0.3173174516544841, |
|
"learning_rate": 0.00012332209607123117, |
|
"loss": 1.6214, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 1.4499779638607315, |
|
"grad_norm": 0.30655650928697775, |
|
"learning_rate": 0.0001230726759461836, |
|
"loss": 1.7923, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.4521815777875715, |
|
"grad_norm": 0.2517175305379352, |
|
"learning_rate": 0.00012282310412186365, |
|
"loss": 1.8434, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 1.4543851917144117, |
|
"grad_norm": 0.24920356879351888, |
|
"learning_rate": 0.0001225733822391675, |
|
"loss": 1.6146, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.4565888056412517, |
|
"grad_norm": 0.2950517285567546, |
|
"learning_rate": 0.00012232351193997774, |
|
"loss": 1.6819, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 1.4587924195680917, |
|
"grad_norm": 0.3058013022960617, |
|
"learning_rate": 0.000122073494867153, |
|
"loss": 1.579, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.4609960334949317, |
|
"grad_norm": 0.46566643268469327, |
|
"learning_rate": 0.00012182333266451684, |
|
"loss": 1.6713, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 1.4631996474217717, |
|
"grad_norm": 0.3269081430168815, |
|
"learning_rate": 0.00012157302697684695, |
|
"loss": 1.6608, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.4654032613486117, |
|
"grad_norm": 0.2472677464376836, |
|
"learning_rate": 0.00012132257944986454, |
|
"loss": 1.6504, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.4676068752754516, |
|
"grad_norm": 0.3189526409165166, |
|
"learning_rate": 0.00012107199173022327, |
|
"loss": 1.6308, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.4698104892022918, |
|
"grad_norm": 0.2439726953786154, |
|
"learning_rate": 0.00012082126546549864, |
|
"loss": 1.6694, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 1.4720141031291318, |
|
"grad_norm": 0.28416189235796196, |
|
"learning_rate": 0.000120570402304177, |
|
"loss": 1.9048, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.4742177170559718, |
|
"grad_norm": 0.25976522560441834, |
|
"learning_rate": 0.00012031940389564478, |
|
"loss": 1.7083, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 1.4764213309828118, |
|
"grad_norm": 0.2874680056323443, |
|
"learning_rate": 0.00012006827189017773, |
|
"loss": 1.7914, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.4786249449096518, |
|
"grad_norm": 0.35212840385267163, |
|
"learning_rate": 0.00011981700793892982, |
|
"loss": 1.8617, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 1.4808285588364918, |
|
"grad_norm": 0.3258646795205973, |
|
"learning_rate": 0.00011956561369392274, |
|
"loss": 1.8569, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.4830321727633318, |
|
"grad_norm": 0.25990120954046436, |
|
"learning_rate": 0.0001193140908080346, |
|
"loss": 1.7778, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 1.485235786690172, |
|
"grad_norm": 0.2916499249746569, |
|
"learning_rate": 0.00011906244093498955, |
|
"loss": 1.7442, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.487439400617012, |
|
"grad_norm": 0.38118475856684764, |
|
"learning_rate": 0.00011881066572934644, |
|
"loss": 1.6281, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.489643014543852, |
|
"grad_norm": 0.33602332943649665, |
|
"learning_rate": 0.00011855876684648837, |
|
"loss": 1.6655, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.491846628470692, |
|
"grad_norm": 0.3091891353046593, |
|
"learning_rate": 0.00011830674594261145, |
|
"loss": 1.818, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 1.494050242397532, |
|
"grad_norm": 0.2675107541956203, |
|
"learning_rate": 0.0001180546046747141, |
|
"loss": 1.9917, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.496253856324372, |
|
"grad_norm": 0.24171145502227592, |
|
"learning_rate": 0.00011780234470058613, |
|
"loss": 1.747, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 1.498457470251212, |
|
"grad_norm": 0.31043683691075824, |
|
"learning_rate": 0.0001175499676787978, |
|
"loss": 1.7863, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.500661084178052, |
|
"grad_norm": 0.25938236019167105, |
|
"learning_rate": 0.000117297475268689, |
|
"loss": 1.6216, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 1.5028646981048919, |
|
"grad_norm": 0.2889407665894309, |
|
"learning_rate": 0.00011704486913035819, |
|
"loss": 1.7023, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.505068312031732, |
|
"grad_norm": 0.2595635779433101, |
|
"learning_rate": 0.00011679215092465163, |
|
"loss": 1.6651, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 1.507271925958572, |
|
"grad_norm": 0.3147360748379521, |
|
"learning_rate": 0.00011653932231315245, |
|
"loss": 1.6855, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.509475539885412, |
|
"grad_norm": 0.2456935617451673, |
|
"learning_rate": 0.00011628638495816955, |
|
"loss": 1.6982, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.511679153812252, |
|
"grad_norm": 0.2644287001406921, |
|
"learning_rate": 0.00011603334052272696, |
|
"loss": 1.7438, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.513882767739092, |
|
"grad_norm": 0.3098487244790836, |
|
"learning_rate": 0.0001157801906705526, |
|
"loss": 1.7459, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 1.5160863816659322, |
|
"grad_norm": 0.2635907160016382, |
|
"learning_rate": 0.00011552693706606758, |
|
"loss": 1.5969, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.518289995592772, |
|
"grad_norm": 0.293285003547433, |
|
"learning_rate": 0.00011527358137437516, |
|
"loss": 1.7899, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 1.5204936095196122, |
|
"grad_norm": 0.3154800148422279, |
|
"learning_rate": 0.00011502012526124978, |
|
"loss": 1.7859, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.5226972234464522, |
|
"grad_norm": 0.3029474817652635, |
|
"learning_rate": 0.00011476657039312613, |
|
"loss": 1.8433, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 1.5249008373732922, |
|
"grad_norm": 0.3394061586641444, |
|
"learning_rate": 0.00011451291843708824, |
|
"loss": 1.8191, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.5271044513001322, |
|
"grad_norm": 0.28588437415991474, |
|
"learning_rate": 0.00011425917106085844, |
|
"loss": 1.6528, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 1.5293080652269722, |
|
"grad_norm": 0.2624098766559022, |
|
"learning_rate": 0.00011400532993278643, |
|
"loss": 1.8208, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.5315116791538124, |
|
"grad_norm": 0.29050819773398057, |
|
"learning_rate": 0.00011375139672183834, |
|
"loss": 1.763, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.5337152930806521, |
|
"grad_norm": 0.28595831334373306, |
|
"learning_rate": 0.00011349737309758572, |
|
"loss": 1.6389, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.5359189070074923, |
|
"grad_norm": 0.310106838673627, |
|
"learning_rate": 0.00011324326073019458, |
|
"loss": 1.7008, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 1.5381225209343323, |
|
"grad_norm": 0.3425497639069633, |
|
"learning_rate": 0.0001129890612904144, |
|
"loss": 1.7975, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.5403261348611723, |
|
"grad_norm": 0.3049155101860015, |
|
"learning_rate": 0.0001127347764495671, |
|
"loss": 1.6302, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 1.5425297487880123, |
|
"grad_norm": 0.3083723366063809, |
|
"learning_rate": 0.00011248040787953622, |
|
"loss": 1.8779, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.5447333627148523, |
|
"grad_norm": 0.2714184317474351, |
|
"learning_rate": 0.00011222595725275562, |
|
"loss": 1.6655, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 1.5469369766416925, |
|
"grad_norm": 0.3020528286222207, |
|
"learning_rate": 0.00011197142624219887, |
|
"loss": 1.5374, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.5491405905685323, |
|
"grad_norm": 0.270325291856936, |
|
"learning_rate": 0.00011171681652136793, |
|
"loss": 1.7442, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 1.5513442044953725, |
|
"grad_norm": 0.2718748140775875, |
|
"learning_rate": 0.00011146212976428232, |
|
"loss": 1.793, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.5535478184222125, |
|
"grad_norm": 0.2684213723870114, |
|
"learning_rate": 0.00011120736764546799, |
|
"loss": 1.5847, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.5557514323490524, |
|
"grad_norm": 0.3293563453835575, |
|
"learning_rate": 0.00011095253183994645, |
|
"loss": 1.5808, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.5579550462758924, |
|
"grad_norm": 0.32122911366332685, |
|
"learning_rate": 0.0001106976240232237, |
|
"loss": 1.7343, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 1.5601586602027324, |
|
"grad_norm": 0.31939212525307864, |
|
"learning_rate": 0.0001104426458712791, |
|
"loss": 1.7123, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.5623622741295726, |
|
"grad_norm": 0.2676344457188956, |
|
"learning_rate": 0.00011018759906055463, |
|
"loss": 1.4029, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 1.5645658880564124, |
|
"grad_norm": 0.33141673784681086, |
|
"learning_rate": 0.00010993248526794347, |
|
"loss": 1.8105, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.5667695019832526, |
|
"grad_norm": 0.2715916392293134, |
|
"learning_rate": 0.00010967730617077938, |
|
"loss": 1.73, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 1.5689731159100926, |
|
"grad_norm": 0.2615038650065928, |
|
"learning_rate": 0.00010942206344682541, |
|
"loss": 1.7547, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.5711767298369326, |
|
"grad_norm": 0.2662856667093564, |
|
"learning_rate": 0.00010916675877426296, |
|
"loss": 1.6934, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 1.5733803437637726, |
|
"grad_norm": 0.20249417258651908, |
|
"learning_rate": 0.00010891139383168072, |
|
"loss": 1.7876, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.5755839576906125, |
|
"grad_norm": 0.30704028921333987, |
|
"learning_rate": 0.00010865597029806365, |
|
"loss": 1.7228, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.5777875716174528, |
|
"grad_norm": 0.32055060377455574, |
|
"learning_rate": 0.00010840048985278195, |
|
"loss": 1.7169, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.5799911855442925, |
|
"grad_norm": 0.31196324685842286, |
|
"learning_rate": 0.00010814495417557997, |
|
"loss": 1.875, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 1.5821947994711327, |
|
"grad_norm": 0.3005168077317045, |
|
"learning_rate": 0.00010788936494656523, |
|
"loss": 1.8862, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.5843984133979727, |
|
"grad_norm": 0.2856041438770126, |
|
"learning_rate": 0.00010763372384619738, |
|
"loss": 1.6419, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 1.5866020273248127, |
|
"grad_norm": 0.27459499931453724, |
|
"learning_rate": 0.00010737803255527702, |
|
"loss": 1.7495, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.5888056412516527, |
|
"grad_norm": 0.23142425230470423, |
|
"learning_rate": 0.00010712229275493489, |
|
"loss": 1.7615, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 1.5910092551784927, |
|
"grad_norm": 0.26704359594443944, |
|
"learning_rate": 0.00010686650612662048, |
|
"loss": 1.7043, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.5932128691053329, |
|
"grad_norm": 0.2644858878666521, |
|
"learning_rate": 0.00010661067435209135, |
|
"loss": 1.8665, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 1.5954164830321726, |
|
"grad_norm": 0.306618968505366, |
|
"learning_rate": 0.00010635479911340176, |
|
"loss": 1.8191, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.5976200969590129, |
|
"grad_norm": 0.32629807608957256, |
|
"learning_rate": 0.00010609888209289183, |
|
"loss": 1.781, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.5998237108858528, |
|
"grad_norm": 0.2701943161629176, |
|
"learning_rate": 0.00010584292497317633, |
|
"loss": 1.6162, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.6020273248126928, |
|
"grad_norm": 0.40336230029221404, |
|
"learning_rate": 0.00010558692943713373, |
|
"loss": 1.722, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 1.6042309387395328, |
|
"grad_norm": 0.2767732465609723, |
|
"learning_rate": 0.000105330897167895, |
|
"loss": 1.6427, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.6064345526663728, |
|
"grad_norm": 0.28371049131821663, |
|
"learning_rate": 0.00010507482984883268, |
|
"loss": 1.6872, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 1.608638166593213, |
|
"grad_norm": 0.2975217481512648, |
|
"learning_rate": 0.00010481872916354978, |
|
"loss": 1.6807, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.6108417805200528, |
|
"grad_norm": 0.30522288055794994, |
|
"learning_rate": 0.00010456259679586862, |
|
"loss": 1.6253, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 1.613045394446893, |
|
"grad_norm": 0.2893887034344458, |
|
"learning_rate": 0.00010430643442981986, |
|
"loss": 1.6465, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.615249008373733, |
|
"grad_norm": 0.28628624467329145, |
|
"learning_rate": 0.0001040502437496315, |
|
"loss": 1.6428, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 1.617452622300573, |
|
"grad_norm": 0.31683631498188874, |
|
"learning_rate": 0.00010379402643971746, |
|
"loss": 1.7033, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.619656236227413, |
|
"grad_norm": 0.2627512390977551, |
|
"learning_rate": 0.00010353778418466697, |
|
"loss": 1.8805, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.621859850154253, |
|
"grad_norm": 0.2976244892863047, |
|
"learning_rate": 0.00010328151866923316, |
|
"loss": 1.8013, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.6240634640810931, |
|
"grad_norm": 0.31860629887164105, |
|
"learning_rate": 0.00010302523157832216, |
|
"loss": 1.648, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 1.626267078007933, |
|
"grad_norm": 0.331987729083311, |
|
"learning_rate": 0.00010276892459698182, |
|
"loss": 1.6325, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.6284706919347731, |
|
"grad_norm": 0.2818827039809831, |
|
"learning_rate": 0.00010251259941039098, |
|
"loss": 1.7213, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 1.6306743058616129, |
|
"grad_norm": 0.2511192996283762, |
|
"learning_rate": 0.00010225625770384797, |
|
"loss": 1.5629, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.632877919788453, |
|
"grad_norm": 0.29741607234777423, |
|
"learning_rate": 0.00010199990116275988, |
|
"loss": 1.7834, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 1.635081533715293, |
|
"grad_norm": 0.30036469948366823, |
|
"learning_rate": 0.00010174353147263125, |
|
"loss": 1.4849, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.637285147642133, |
|
"grad_norm": 0.30522753547462433, |
|
"learning_rate": 0.00010148715031905312, |
|
"loss": 1.8071, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 1.6394887615689733, |
|
"grad_norm": 0.2769872314517015, |
|
"learning_rate": 0.00010123075938769187, |
|
"loss": 1.685, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.641692375495813, |
|
"grad_norm": 0.2709018320510704, |
|
"learning_rate": 0.00010097436036427816, |
|
"loss": 1.7853, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.6438959894226532, |
|
"grad_norm": 0.3446719925528754, |
|
"learning_rate": 0.00010071795493459591, |
|
"loss": 1.7783, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.646099603349493, |
|
"grad_norm": 0.3208180036756325, |
|
"learning_rate": 0.00010046154478447114, |
|
"loss": 1.8982, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 1.6483032172763332, |
|
"grad_norm": 0.289239117609306, |
|
"learning_rate": 0.00010020513159976084, |
|
"loss": 1.7313, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.6505068312031732, |
|
"grad_norm": 0.30122205784761724, |
|
"learning_rate": 9.994871706634204e-05, |
|
"loss": 1.6831, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 1.6527104451300132, |
|
"grad_norm": 0.2835725442151606, |
|
"learning_rate": 9.96923028701006e-05, |
|
"loss": 1.6129, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.6549140590568534, |
|
"grad_norm": 0.4849263642027594, |
|
"learning_rate": 9.943589069692014e-05, |
|
"loss": 1.783, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 1.6571176729836932, |
|
"grad_norm": 0.34060901148734135, |
|
"learning_rate": 9.917948223267105e-05, |
|
"loss": 1.642, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.6593212869105334, |
|
"grad_norm": 0.3001957536668968, |
|
"learning_rate": 9.892307916319919e-05, |
|
"loss": 1.6005, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 1.6615249008373731, |
|
"grad_norm": 0.3141764748091563, |
|
"learning_rate": 9.866668317431514e-05, |
|
"loss": 1.8968, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.6637285147642134, |
|
"grad_norm": 0.3066452663914771, |
|
"learning_rate": 9.841029595178282e-05, |
|
"loss": 1.6288, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.6659321286910533, |
|
"grad_norm": 0.2737312482046674, |
|
"learning_rate": 9.815391918130848e-05, |
|
"loss": 1.6151, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.6681357426178933, |
|
"grad_norm": 0.337368580313636, |
|
"learning_rate": 9.789755454852971e-05, |
|
"loss": 1.6298, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 1.6703393565447333, |
|
"grad_norm": 0.22864544249672591, |
|
"learning_rate": 9.764120373900436e-05, |
|
"loss": 1.7166, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.6725429704715733, |
|
"grad_norm": 0.3199596986088049, |
|
"learning_rate": 9.738486843819919e-05, |
|
"loss": 1.4291, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 1.6747465843984135, |
|
"grad_norm": 0.484113228627448, |
|
"learning_rate": 9.712855033147921e-05, |
|
"loss": 1.8267, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.6769501983252533, |
|
"grad_norm": 0.29066627384947913, |
|
"learning_rate": 9.68722511040962e-05, |
|
"loss": 1.7618, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 1.6791538122520935, |
|
"grad_norm": 0.2756578416564992, |
|
"learning_rate": 9.661597244117802e-05, |
|
"loss": 1.7626, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.6813574261789335, |
|
"grad_norm": 0.26262162877838774, |
|
"learning_rate": 9.635971602771716e-05, |
|
"loss": 1.6437, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 1.6835610401057735, |
|
"grad_norm": 0.3485930873206027, |
|
"learning_rate": 9.61034835485598e-05, |
|
"loss": 1.9054, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.6857646540326134, |
|
"grad_norm": 0.347720471228885, |
|
"learning_rate": 9.584727668839487e-05, |
|
"loss": 1.6653, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.6879682679594534, |
|
"grad_norm": 0.259896835339376, |
|
"learning_rate": 9.559109713174282e-05, |
|
"loss": 1.8298, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.6901718818862936, |
|
"grad_norm": 0.27688333027135853, |
|
"learning_rate": 9.533494656294458e-05, |
|
"loss": 1.5074, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 1.6923754958131334, |
|
"grad_norm": 0.3186540043438054, |
|
"learning_rate": 9.507882666615049e-05, |
|
"loss": 1.666, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.6945791097399736, |
|
"grad_norm": 0.34137430082378867, |
|
"learning_rate": 9.482273912530913e-05, |
|
"loss": 1.7862, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 1.6967827236668136, |
|
"grad_norm": 0.31312228735788816, |
|
"learning_rate": 9.456668562415657e-05, |
|
"loss": 1.6969, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.6989863375936536, |
|
"grad_norm": 0.3211377859121694, |
|
"learning_rate": 9.431066784620486e-05, |
|
"loss": 1.8653, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 1.7011899515204936, |
|
"grad_norm": 0.3443229930133863, |
|
"learning_rate": 9.405468747473127e-05, |
|
"loss": 1.696, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.7033935654473336, |
|
"grad_norm": 0.2900871769643087, |
|
"learning_rate": 9.379874619276707e-05, |
|
"loss": 1.7654, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 1.7055971793741738, |
|
"grad_norm": 0.3109956258190758, |
|
"learning_rate": 9.354284568308665e-05, |
|
"loss": 1.6229, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.7078007933010135, |
|
"grad_norm": 0.29096423007153205, |
|
"learning_rate": 9.328698762819623e-05, |
|
"loss": 1.7274, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.7100044072278537, |
|
"grad_norm": 0.2674081631314747, |
|
"learning_rate": 9.303117371032284e-05, |
|
"loss": 1.7598, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.7122080211546937, |
|
"grad_norm": 0.3370488411342742, |
|
"learning_rate": 9.277540561140342e-05, |
|
"loss": 1.7854, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 1.7144116350815337, |
|
"grad_norm": 0.2236164025203487, |
|
"learning_rate": 9.251968501307365e-05, |
|
"loss": 1.735, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.7166152490083737, |
|
"grad_norm": 0.31253584348251195, |
|
"learning_rate": 9.226401359665686e-05, |
|
"loss": 1.5912, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 1.7188188629352137, |
|
"grad_norm": 0.28523913611604496, |
|
"learning_rate": 9.2008393043153e-05, |
|
"loss": 1.7252, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.721022476862054, |
|
"grad_norm": 0.3641271132364838, |
|
"learning_rate": 9.17528250332277e-05, |
|
"loss": 1.7001, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 1.7232260907888937, |
|
"grad_norm": 0.34337087912187736, |
|
"learning_rate": 9.149731124720104e-05, |
|
"loss": 1.941, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.7254297047157339, |
|
"grad_norm": 0.3388940885148476, |
|
"learning_rate": 9.124185336503656e-05, |
|
"loss": 1.751, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 1.7276333186425739, |
|
"grad_norm": 0.3980423482061792, |
|
"learning_rate": 9.098645306633029e-05, |
|
"loss": 1.7571, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.7298369325694138, |
|
"grad_norm": 0.28139197000268995, |
|
"learning_rate": 9.073111203029972e-05, |
|
"loss": 1.5225, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.7320405464962538, |
|
"grad_norm": 0.28174263964005763, |
|
"learning_rate": 9.04758319357726e-05, |
|
"loss": 1.6746, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.7342441604230938, |
|
"grad_norm": 0.2461518030614674, |
|
"learning_rate": 9.0220614461176e-05, |
|
"loss": 1.5447, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 1.736447774349934, |
|
"grad_norm": 0.3297581828947644, |
|
"learning_rate": 8.99654612845253e-05, |
|
"loss": 1.5802, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.7386513882767738, |
|
"grad_norm": 0.3434764691584567, |
|
"learning_rate": 8.971037408341319e-05, |
|
"loss": 1.6836, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 1.740855002203614, |
|
"grad_norm": 0.2967197456557559, |
|
"learning_rate": 8.94553545349985e-05, |
|
"loss": 1.6141, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.743058616130454, |
|
"grad_norm": 0.3235657879123794, |
|
"learning_rate": 8.92004043159953e-05, |
|
"loss": 1.8539, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 1.745262230057294, |
|
"grad_norm": 0.30230482414452203, |
|
"learning_rate": 8.894552510266172e-05, |
|
"loss": 1.6447, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.747465843984134, |
|
"grad_norm": 0.25937552544500664, |
|
"learning_rate": 8.869071857078926e-05, |
|
"loss": 1.7132, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 1.749669457910974, |
|
"grad_norm": 0.3504915271341817, |
|
"learning_rate": 8.843598639569134e-05, |
|
"loss": 1.616, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.7518730718378142, |
|
"grad_norm": 0.3391539087204084, |
|
"learning_rate": 8.818133025219258e-05, |
|
"loss": 1.7726, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.754076685764654, |
|
"grad_norm": 0.2945081027363211, |
|
"learning_rate": 8.79267518146177e-05, |
|
"loss": 1.6354, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.7562802996914941, |
|
"grad_norm": 0.3122456807955038, |
|
"learning_rate": 8.767225275678054e-05, |
|
"loss": 1.7703, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 1.7584839136183341, |
|
"grad_norm": 0.2911403528384371, |
|
"learning_rate": 8.741783475197301e-05, |
|
"loss": 1.6184, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.760687527545174, |
|
"grad_norm": 0.28063561583988544, |
|
"learning_rate": 8.716349947295406e-05, |
|
"loss": 1.6723, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 1.762891141472014, |
|
"grad_norm": 0.24899262558923158, |
|
"learning_rate": 8.690924859193877e-05, |
|
"loss": 1.5999, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.765094755398854, |
|
"grad_norm": 0.21950111519024182, |
|
"learning_rate": 8.665508378058737e-05, |
|
"loss": 1.59, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 1.7672983693256943, |
|
"grad_norm": 0.2812929263006604, |
|
"learning_rate": 8.640100670999413e-05, |
|
"loss": 1.7219, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.769501983252534, |
|
"grad_norm": 0.26800092694380395, |
|
"learning_rate": 8.614701905067648e-05, |
|
"loss": 1.5878, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 1.7717055971793743, |
|
"grad_norm": 0.3421307184861735, |
|
"learning_rate": 8.589312247256385e-05, |
|
"loss": 1.5289, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.7739092111062142, |
|
"grad_norm": 0.2759648702531539, |
|
"learning_rate": 8.563931864498709e-05, |
|
"loss": 1.7232, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.7761128250330542, |
|
"grad_norm": 0.3083755515457062, |
|
"learning_rate": 8.538560923666697e-05, |
|
"loss": 1.5333, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.7783164389598942, |
|
"grad_norm": 0.32344968491148207, |
|
"learning_rate": 8.51319959157036e-05, |
|
"loss": 1.6531, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 1.7805200528867342, |
|
"grad_norm": 0.2628449279485873, |
|
"learning_rate": 8.487848034956527e-05, |
|
"loss": 1.7176, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.7827236668135744, |
|
"grad_norm": 0.30081231536696845, |
|
"learning_rate": 8.462506420507764e-05, |
|
"loss": 1.6087, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 1.7849272807404142, |
|
"grad_norm": 0.24172711814982975, |
|
"learning_rate": 8.437174914841261e-05, |
|
"loss": 1.6365, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.7871308946672544, |
|
"grad_norm": 0.3240513311621846, |
|
"learning_rate": 8.411853684507744e-05, |
|
"loss": 1.6818, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 1.7893345085940942, |
|
"grad_norm": 0.26629546007810934, |
|
"learning_rate": 8.38654289599038e-05, |
|
"loss": 1.6165, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.7915381225209344, |
|
"grad_norm": 0.2718697282057903, |
|
"learning_rate": 8.36124271570369e-05, |
|
"loss": 1.7767, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 1.7937417364477743, |
|
"grad_norm": 0.2991997948062704, |
|
"learning_rate": 8.335953309992442e-05, |
|
"loss": 1.6968, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.7959453503746143, |
|
"grad_norm": 0.33154914217795595, |
|
"learning_rate": 8.310674845130563e-05, |
|
"loss": 1.8523, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.7981489643014545, |
|
"grad_norm": 0.29895299240181794, |
|
"learning_rate": 8.285407487320042e-05, |
|
"loss": 1.5945, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.8003525782282943, |
|
"grad_norm": 0.2947016318335874, |
|
"learning_rate": 8.260151402689848e-05, |
|
"loss": 1.625, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 1.8025561921551345, |
|
"grad_norm": 0.3039906812402064, |
|
"learning_rate": 8.234906757294829e-05, |
|
"loss": 1.8956, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.8047598060819743, |
|
"grad_norm": 0.311603713861004, |
|
"learning_rate": 8.209673717114618e-05, |
|
"loss": 1.5808, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 1.8069634200088145, |
|
"grad_norm": 0.3208961582381735, |
|
"learning_rate": 8.184452448052547e-05, |
|
"loss": 1.4928, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.8091670339356545, |
|
"grad_norm": 0.27150517803601, |
|
"learning_rate": 8.15924311593456e-05, |
|
"loss": 1.7155, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 1.8113706478624945, |
|
"grad_norm": 0.31637619824367874, |
|
"learning_rate": 8.134045886508108e-05, |
|
"loss": 1.4761, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.8135742617893347, |
|
"grad_norm": 0.2536681416637017, |
|
"learning_rate": 8.108860925441076e-05, |
|
"loss": 1.7682, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 1.8157778757161744, |
|
"grad_norm": 0.2683657294785762, |
|
"learning_rate": 8.083688398320681e-05, |
|
"loss": 1.6091, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.8179814896430146, |
|
"grad_norm": 0.3004847316685122, |
|
"learning_rate": 8.058528470652396e-05, |
|
"loss": 1.7524, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.8201851035698544, |
|
"grad_norm": 0.3033198553489031, |
|
"learning_rate": 8.03338130785885e-05, |
|
"loss": 1.6975, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.8223887174966946, |
|
"grad_norm": 0.2932969746872377, |
|
"learning_rate": 8.008247075278742e-05, |
|
"loss": 1.6345, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 1.8245923314235346, |
|
"grad_norm": 0.29419726566032006, |
|
"learning_rate": 7.983125938165758e-05, |
|
"loss": 2.0007, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.8267959453503746, |
|
"grad_norm": 0.36994662608619383, |
|
"learning_rate": 7.958018061687494e-05, |
|
"loss": 1.8041, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 1.8289995592772146, |
|
"grad_norm": 0.3171521170329957, |
|
"learning_rate": 7.932923610924343e-05, |
|
"loss": 1.8268, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.8312031732040546, |
|
"grad_norm": 0.29749910928069495, |
|
"learning_rate": 7.907842750868441e-05, |
|
"loss": 1.7521, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 1.8334067871308948, |
|
"grad_norm": 0.28978378846287695, |
|
"learning_rate": 7.882775646422547e-05, |
|
"loss": 1.8141, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.8356104010577345, |
|
"grad_norm": 0.32370603539130244, |
|
"learning_rate": 7.857722462399009e-05, |
|
"loss": 1.5852, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 1.8378140149845748, |
|
"grad_norm": 0.3000864365709902, |
|
"learning_rate": 7.832683363518621e-05, |
|
"loss": 1.5174, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.8400176289114147, |
|
"grad_norm": 0.30867413513998754, |
|
"learning_rate": 7.807658514409587e-05, |
|
"loss": 1.7091, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.8422212428382547, |
|
"grad_norm": 0.2941777962805528, |
|
"learning_rate": 7.782648079606412e-05, |
|
"loss": 1.9314, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.8444248567650947, |
|
"grad_norm": 0.26789438468880916, |
|
"learning_rate": 7.757652223548836e-05, |
|
"loss": 1.5959, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 1.8466284706919347, |
|
"grad_norm": 0.2852613225045537, |
|
"learning_rate": 7.732671110580746e-05, |
|
"loss": 1.4776, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.848832084618775, |
|
"grad_norm": 0.28204667104600045, |
|
"learning_rate": 7.707704904949085e-05, |
|
"loss": 1.6044, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 1.8510356985456147, |
|
"grad_norm": 0.3433461472883869, |
|
"learning_rate": 7.682753770802791e-05, |
|
"loss": 1.8343, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.8532393124724549, |
|
"grad_norm": 0.27355747329642977, |
|
"learning_rate": 7.657817872191713e-05, |
|
"loss": 1.6496, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 1.8554429263992949, |
|
"grad_norm": 0.2860370187153179, |
|
"learning_rate": 7.632897373065522e-05, |
|
"loss": 1.5719, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.8576465403261349, |
|
"grad_norm": 0.2618172945255669, |
|
"learning_rate": 7.607992437272642e-05, |
|
"loss": 1.6911, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 1.8598501542529748, |
|
"grad_norm": 0.2570942578849571, |
|
"learning_rate": 7.583103228559164e-05, |
|
"loss": 1.7205, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.8620537681798148, |
|
"grad_norm": 0.30089829678159197, |
|
"learning_rate": 7.558229910567794e-05, |
|
"loss": 1.7114, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.864257382106655, |
|
"grad_norm": 0.3120407343609943, |
|
"learning_rate": 7.533372646836736e-05, |
|
"loss": 1.6438, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.8664609960334948, |
|
"grad_norm": 0.29920830893784145, |
|
"learning_rate": 7.508531600798657e-05, |
|
"loss": 1.8773, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 1.868664609960335, |
|
"grad_norm": 0.28932016728007587, |
|
"learning_rate": 7.483706935779584e-05, |
|
"loss": 1.5398, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.870868223887175, |
|
"grad_norm": 0.3323660400642421, |
|
"learning_rate": 7.458898814997852e-05, |
|
"loss": 1.8427, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 1.873071837814015, |
|
"grad_norm": 0.24905133812180258, |
|
"learning_rate": 7.434107401563016e-05, |
|
"loss": 1.5713, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.875275451740855, |
|
"grad_norm": 0.3177914709078092, |
|
"learning_rate": 7.409332858474772e-05, |
|
"loss": 1.8163, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 1.877479065667695, |
|
"grad_norm": 0.2731160161178577, |
|
"learning_rate": 7.384575348621909e-05, |
|
"loss": 1.502, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.8796826795945352, |
|
"grad_norm": 0.318150552351912, |
|
"learning_rate": 7.359835034781227e-05, |
|
"loss": 1.709, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 1.881886293521375, |
|
"grad_norm": 0.2992460408245423, |
|
"learning_rate": 7.335112079616456e-05, |
|
"loss": 1.6948, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.8840899074482151, |
|
"grad_norm": 0.26242651145469986, |
|
"learning_rate": 7.31040664567719e-05, |
|
"loss": 1.5288, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.8862935213750551, |
|
"grad_norm": 0.32400746012853343, |
|
"learning_rate": 7.285718895397848e-05, |
|
"loss": 1.7299, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.8884971353018951, |
|
"grad_norm": 0.30579297267422695, |
|
"learning_rate": 7.261048991096558e-05, |
|
"loss": 1.8361, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 1.890700749228735, |
|
"grad_norm": 0.28173595807802665, |
|
"learning_rate": 7.236397094974119e-05, |
|
"loss": 1.737, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.892904363155575, |
|
"grad_norm": 0.3062487579152163, |
|
"learning_rate": 7.211763369112934e-05, |
|
"loss": 1.6612, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 1.8951079770824153, |
|
"grad_norm": 0.29979139920156694, |
|
"learning_rate": 7.18714797547594e-05, |
|
"loss": 1.762, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.897311591009255, |
|
"grad_norm": 0.3021721281737111, |
|
"learning_rate": 7.162551075905538e-05, |
|
"loss": 1.8317, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 1.8995152049360953, |
|
"grad_norm": 0.2809347403792435, |
|
"learning_rate": 7.137972832122532e-05, |
|
"loss": 1.7406, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.9017188188629353, |
|
"grad_norm": 0.31981826220168086, |
|
"learning_rate": 7.113413405725069e-05, |
|
"loss": 1.8273, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 1.9039224327897752, |
|
"grad_norm": 0.27829653919403746, |
|
"learning_rate": 7.088872958187578e-05, |
|
"loss": 1.6196, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.9061260467166152, |
|
"grad_norm": 0.2612389626891207, |
|
"learning_rate": 7.064351650859704e-05, |
|
"loss": 1.7173, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.9083296606434552, |
|
"grad_norm": 0.3351698187645255, |
|
"learning_rate": 7.039849644965246e-05, |
|
"loss": 1.5561, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.9105332745702954, |
|
"grad_norm": 0.276151473793176, |
|
"learning_rate": 7.015367101601091e-05, |
|
"loss": 1.5952, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 1.9127368884971352, |
|
"grad_norm": 0.2866569066490694, |
|
"learning_rate": 6.990904181736187e-05, |
|
"loss": 1.7386, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.9149405024239754, |
|
"grad_norm": 0.30680340268949835, |
|
"learning_rate": 6.96646104621043e-05, |
|
"loss": 1.793, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 1.9171441163508154, |
|
"grad_norm": 0.2853878652168025, |
|
"learning_rate": 6.942037855733661e-05, |
|
"loss": 1.8032, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.9193477302776554, |
|
"grad_norm": 0.3335656030865567, |
|
"learning_rate": 6.917634770884571e-05, |
|
"loss": 1.8019, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 1.9215513442044954, |
|
"grad_norm": 0.26469328077404297, |
|
"learning_rate": 6.893251952109668e-05, |
|
"loss": 1.7769, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.9237549581313353, |
|
"grad_norm": 0.3611778018097071, |
|
"learning_rate": 6.868889559722213e-05, |
|
"loss": 1.845, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 1.9259585720581756, |
|
"grad_norm": 0.2607913972126291, |
|
"learning_rate": 6.84454775390116e-05, |
|
"loss": 1.6583, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.9281621859850153, |
|
"grad_norm": 0.2952126078040435, |
|
"learning_rate": 6.820226694690112e-05, |
|
"loss": 1.4631, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.9303657999118555, |
|
"grad_norm": 0.28021793360716346, |
|
"learning_rate": 6.795926541996273e-05, |
|
"loss": 1.7197, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.9325694138386955, |
|
"grad_norm": 0.2431287772236666, |
|
"learning_rate": 6.771647455589384e-05, |
|
"loss": 1.7528, |
|
"step": 4385 |
|
}, |
|
{ |
|
"epoch": 1.9347730277655355, |
|
"grad_norm": 0.24277132942171645, |
|
"learning_rate": 6.74738959510068e-05, |
|
"loss": 1.52, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.9369766416923755, |
|
"grad_norm": 0.2616438959393551, |
|
"learning_rate": 6.723153120021833e-05, |
|
"loss": 1.6386, |
|
"step": 4395 |
|
}, |
|
{ |
|
"epoch": 1.9391802556192155, |
|
"grad_norm": 0.3044048108764881, |
|
"learning_rate": 6.698938189703918e-05, |
|
"loss": 1.653, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.9413838695460557, |
|
"grad_norm": 0.2952474841434869, |
|
"learning_rate": 6.674744963356357e-05, |
|
"loss": 1.6325, |
|
"step": 4405 |
|
}, |
|
{ |
|
"epoch": 1.9435874834728954, |
|
"grad_norm": 0.3104804592935538, |
|
"learning_rate": 6.65057360004586e-05, |
|
"loss": 1.7827, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.9457910973997357, |
|
"grad_norm": 0.27421700814052996, |
|
"learning_rate": 6.626424258695403e-05, |
|
"loss": 1.6614, |
|
"step": 4415 |
|
}, |
|
{ |
|
"epoch": 1.9479947113265754, |
|
"grad_norm": 0.28977404672357854, |
|
"learning_rate": 6.60229709808317e-05, |
|
"loss": 1.8225, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.9501983252534156, |
|
"grad_norm": 0.30875392634058607, |
|
"learning_rate": 6.578192276841501e-05, |
|
"loss": 1.7437, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.9524019391802556, |
|
"grad_norm": 0.307292209389746, |
|
"learning_rate": 6.554109953455864e-05, |
|
"loss": 1.7637, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.9546055531070956, |
|
"grad_norm": 0.34674701377289235, |
|
"learning_rate": 6.53005028626381e-05, |
|
"loss": 1.6782, |
|
"step": 4435 |
|
}, |
|
{ |
|
"epoch": 1.9568091670339358, |
|
"grad_norm": 0.2997355914742966, |
|
"learning_rate": 6.506013433453926e-05, |
|
"loss": 1.7479, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.9590127809607756, |
|
"grad_norm": 0.33789074614600445, |
|
"learning_rate": 6.4819995530648e-05, |
|
"loss": 1.6811, |
|
"step": 4445 |
|
}, |
|
{ |
|
"epoch": 1.9612163948876158, |
|
"grad_norm": 0.3008656237866022, |
|
"learning_rate": 6.45800880298397e-05, |
|
"loss": 1.6704, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.9634200088144556, |
|
"grad_norm": 0.31248757394845805, |
|
"learning_rate": 6.434041340946909e-05, |
|
"loss": 1.6695, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 1.9656236227412958, |
|
"grad_norm": 0.3086687965739077, |
|
"learning_rate": 6.41009732453597e-05, |
|
"loss": 1.5949, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.9678272366681357, |
|
"grad_norm": 0.2536971137620243, |
|
"learning_rate": 6.386176911179353e-05, |
|
"loss": 1.6463, |
|
"step": 4465 |
|
}, |
|
{ |
|
"epoch": 1.9700308505949757, |
|
"grad_norm": 0.3287438241265034, |
|
"learning_rate": 6.362280258150074e-05, |
|
"loss": 1.6429, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.972234464521816, |
|
"grad_norm": 0.2565655853570647, |
|
"learning_rate": 6.33840752256492e-05, |
|
"loss": 1.5546, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.9744380784486557, |
|
"grad_norm": 0.3055708289742853, |
|
"learning_rate": 6.314558861383442e-05, |
|
"loss": 1.7164, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.976641692375496, |
|
"grad_norm": 0.26286511562137227, |
|
"learning_rate": 6.29073443140689e-05, |
|
"loss": 1.7841, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 1.9788453063023357, |
|
"grad_norm": 0.2842174046049182, |
|
"learning_rate": 6.266934389277204e-05, |
|
"loss": 1.7053, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.981048920229176, |
|
"grad_norm": 0.34820565260475556, |
|
"learning_rate": 6.24315889147597e-05, |
|
"loss": 1.7649, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 1.9832525341560159, |
|
"grad_norm": 0.3290871426082691, |
|
"learning_rate": 6.219408094323415e-05, |
|
"loss": 1.6402, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.9854561480828559, |
|
"grad_norm": 0.25604694542787715, |
|
"learning_rate": 6.195682153977351e-05, |
|
"loss": 1.5192, |
|
"step": 4505 |
|
}, |
|
{ |
|
"epoch": 1.9876597620096959, |
|
"grad_norm": 0.28478382829773047, |
|
"learning_rate": 6.17198122643216e-05, |
|
"loss": 1.649, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 1.9898633759365358, |
|
"grad_norm": 0.2776833563766362, |
|
"learning_rate": 6.148305467517768e-05, |
|
"loss": 1.7351, |
|
"step": 4515 |
|
}, |
|
{ |
|
"epoch": 1.992066989863376, |
|
"grad_norm": 0.32063858764742265, |
|
"learning_rate": 6.124655032898631e-05, |
|
"loss": 1.8315, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.9942706037902158, |
|
"grad_norm": 0.29572004437320715, |
|
"learning_rate": 6.1010300780726925e-05, |
|
"loss": 1.7337, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.996474217717056, |
|
"grad_norm": 0.3096259639483799, |
|
"learning_rate": 6.077430758370376e-05, |
|
"loss": 1.759, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 1.998677831643896, |
|
"grad_norm": 0.31442409226477874, |
|
"learning_rate": 6.053857228953546e-05, |
|
"loss": 1.7822, |
|
"step": 4535 |
|
}, |
|
{ |
|
"epoch": 2.000881445570736, |
|
"grad_norm": 0.2377883462279695, |
|
"learning_rate": 6.03030964481452e-05, |
|
"loss": 1.4966, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.003085059497576, |
|
"grad_norm": 0.2900063226146335, |
|
"learning_rate": 6.0067881607750134e-05, |
|
"loss": 1.7189, |
|
"step": 4545 |
|
}, |
|
{ |
|
"epoch": 2.005288673424416, |
|
"grad_norm": 0.30629874279137115, |
|
"learning_rate": 5.983292931485142e-05, |
|
"loss": 1.5509, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.007492287351256, |
|
"grad_norm": 0.2682166094278232, |
|
"learning_rate": 5.9598241114223986e-05, |
|
"loss": 1.5487, |
|
"step": 4555 |
|
}, |
|
{ |
|
"epoch": 2.009695901278096, |
|
"grad_norm": 0.3321377020430881, |
|
"learning_rate": 5.936381854890646e-05, |
|
"loss": 1.8033, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.011899515204936, |
|
"grad_norm": 0.31810161747738896, |
|
"learning_rate": 5.912966316019093e-05, |
|
"loss": 1.621, |
|
"step": 4565 |
|
}, |
|
{ |
|
"epoch": 2.014103129131776, |
|
"grad_norm": 0.24729310525461057, |
|
"learning_rate": 5.8895776487612765e-05, |
|
"loss": 1.5993, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.016306743058616, |
|
"grad_norm": 0.30363634336485235, |
|
"learning_rate": 5.8662160068940655e-05, |
|
"loss": 1.6749, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 2.0185103569854563, |
|
"grad_norm": 0.2851113038101266, |
|
"learning_rate": 5.84288154401664e-05, |
|
"loss": 1.6956, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.020713970912296, |
|
"grad_norm": 0.345208802987356, |
|
"learning_rate": 5.81957441354948e-05, |
|
"loss": 1.5851, |
|
"step": 4585 |
|
}, |
|
{ |
|
"epoch": 2.0229175848391363, |
|
"grad_norm": 0.33771374182942, |
|
"learning_rate": 5.796294768733362e-05, |
|
"loss": 1.5908, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.025121198765976, |
|
"grad_norm": 0.29219932563018164, |
|
"learning_rate": 5.773042762628342e-05, |
|
"loss": 1.677, |
|
"step": 4595 |
|
}, |
|
{ |
|
"epoch": 2.0273248126928163, |
|
"grad_norm": 0.26977700042523883, |
|
"learning_rate": 5.749818548112762e-05, |
|
"loss": 1.6073, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.029528426619656, |
|
"grad_norm": 0.30654875158910516, |
|
"learning_rate": 5.726622277882243e-05, |
|
"loss": 1.7622, |
|
"step": 4605 |
|
}, |
|
{ |
|
"epoch": 2.0317320405464963, |
|
"grad_norm": 0.3217700066222481, |
|
"learning_rate": 5.703454104448665e-05, |
|
"loss": 1.804, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.0339356544733365, |
|
"grad_norm": 0.30632620282091305, |
|
"learning_rate": 5.680314180139178e-05, |
|
"loss": 1.7833, |
|
"step": 4615 |
|
}, |
|
{ |
|
"epoch": 2.0361392684001762, |
|
"grad_norm": 0.3023215763344122, |
|
"learning_rate": 5.657202657095206e-05, |
|
"loss": 1.7969, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.0383428823270164, |
|
"grad_norm": 0.3078282400161021, |
|
"learning_rate": 5.6341196872714394e-05, |
|
"loss": 1.6958, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 2.040546496253856, |
|
"grad_norm": 0.27870283271610047, |
|
"learning_rate": 5.611065422434828e-05, |
|
"loss": 1.5725, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.0427501101806964, |
|
"grad_norm": 0.3114689828172716, |
|
"learning_rate": 5.588040014163585e-05, |
|
"loss": 1.562, |
|
"step": 4635 |
|
}, |
|
{ |
|
"epoch": 2.044953724107536, |
|
"grad_norm": 0.2912835457860766, |
|
"learning_rate": 5.565043613846219e-05, |
|
"loss": 1.7486, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.0471573380343764, |
|
"grad_norm": 0.29628653391558857, |
|
"learning_rate": 5.542076372680498e-05, |
|
"loss": 1.7084, |
|
"step": 4645 |
|
}, |
|
{ |
|
"epoch": 2.0493609519612166, |
|
"grad_norm": 0.29948576765849355, |
|
"learning_rate": 5.519138441672471e-05, |
|
"loss": 1.6903, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.0515645658880564, |
|
"grad_norm": 0.26477213023267704, |
|
"learning_rate": 5.496229971635487e-05, |
|
"loss": 1.6743, |
|
"step": 4655 |
|
}, |
|
{ |
|
"epoch": 2.0537681798148966, |
|
"grad_norm": 0.36060338821204513, |
|
"learning_rate": 5.473351113189194e-05, |
|
"loss": 1.8093, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.0559717937417363, |
|
"grad_norm": 0.31416116481556966, |
|
"learning_rate": 5.4505020167585396e-05, |
|
"loss": 1.6409, |
|
"step": 4665 |
|
}, |
|
{ |
|
"epoch": 2.0581754076685765, |
|
"grad_norm": 0.23960293640658495, |
|
"learning_rate": 5.4276828325727934e-05, |
|
"loss": 1.5688, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.0603790215954163, |
|
"grad_norm": 0.33607407453371085, |
|
"learning_rate": 5.4048937106645613e-05, |
|
"loss": 1.5812, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 2.0625826355222565, |
|
"grad_norm": 0.3362219928372315, |
|
"learning_rate": 5.3821348008687967e-05, |
|
"loss": 1.7184, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.0647862494490967, |
|
"grad_norm": 0.31128427440833256, |
|
"learning_rate": 5.3594062528218025e-05, |
|
"loss": 1.6606, |
|
"step": 4685 |
|
}, |
|
{ |
|
"epoch": 2.0669898633759365, |
|
"grad_norm": 0.28720777906212147, |
|
"learning_rate": 5.336708215960258e-05, |
|
"loss": 1.6505, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 2.0691934773027767, |
|
"grad_norm": 0.35035039806258184, |
|
"learning_rate": 5.314040839520253e-05, |
|
"loss": 1.7716, |
|
"step": 4695 |
|
}, |
|
{ |
|
"epoch": 2.0713970912296165, |
|
"grad_norm": 0.2931752659797425, |
|
"learning_rate": 5.291404272536275e-05, |
|
"loss": 1.6877, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.0736007051564567, |
|
"grad_norm": 0.2873712201718596, |
|
"learning_rate": 5.268798663840243e-05, |
|
"loss": 1.6062, |
|
"step": 4705 |
|
}, |
|
{ |
|
"epoch": 2.0758043190832964, |
|
"grad_norm": 0.26676705722923527, |
|
"learning_rate": 5.2462241620605366e-05, |
|
"loss": 1.6592, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 2.0780079330101366, |
|
"grad_norm": 0.3509576273254501, |
|
"learning_rate": 5.223680915621014e-05, |
|
"loss": 1.671, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 2.080211546936977, |
|
"grad_norm": 0.39209953129493824, |
|
"learning_rate": 5.2011690727400285e-05, |
|
"loss": 1.6385, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.0824151608638166, |
|
"grad_norm": 0.3402308135526598, |
|
"learning_rate": 5.178688781429455e-05, |
|
"loss": 1.6095, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 2.084618774790657, |
|
"grad_norm": 0.29891446961728113, |
|
"learning_rate": 5.1562401894937365e-05, |
|
"loss": 1.6653, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 2.0868223887174966, |
|
"grad_norm": 0.2466567525935987, |
|
"learning_rate": 5.133823444528889e-05, |
|
"loss": 1.6558, |
|
"step": 4735 |
|
}, |
|
{ |
|
"epoch": 2.089026002644337, |
|
"grad_norm": 0.3676958248067488, |
|
"learning_rate": 5.111438693921536e-05, |
|
"loss": 1.6279, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 2.0912296165711766, |
|
"grad_norm": 0.343788030408807, |
|
"learning_rate": 5.089086084847954e-05, |
|
"loss": 1.6951, |
|
"step": 4745 |
|
}, |
|
{ |
|
"epoch": 2.0934332304980168, |
|
"grad_norm": 0.3319476261884821, |
|
"learning_rate": 5.066765764273078e-05, |
|
"loss": 1.617, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.095636844424857, |
|
"grad_norm": 0.33586220021618074, |
|
"learning_rate": 5.044477878949571e-05, |
|
"loss": 1.6601, |
|
"step": 4755 |
|
}, |
|
{ |
|
"epoch": 2.0978404583516967, |
|
"grad_norm": 0.27982003995931076, |
|
"learning_rate": 5.0222225754168175e-05, |
|
"loss": 1.6063, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 2.100044072278537, |
|
"grad_norm": 0.3330827204915635, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 1.7513, |
|
"step": 4765 |
|
}, |
|
{ |
|
"epoch": 2.1022476862053767, |
|
"grad_norm": 0.34805614239612326, |
|
"learning_rate": 4.97781029880911e-05, |
|
"loss": 1.5524, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 2.104451300132217, |
|
"grad_norm": 0.287355630557104, |
|
"learning_rate": 4.955653617737995e-05, |
|
"loss": 1.6138, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 2.1066549140590567, |
|
"grad_norm": 0.29898227350779466, |
|
"learning_rate": 4.9335301024634094e-05, |
|
"loss": 1.6648, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 2.108858527985897, |
|
"grad_norm": 0.26174467755819714, |
|
"learning_rate": 4.911439898444036e-05, |
|
"loss": 1.594, |
|
"step": 4785 |
|
}, |
|
{ |
|
"epoch": 2.111062141912737, |
|
"grad_norm": 0.35692436121383353, |
|
"learning_rate": 4.889383150919543e-05, |
|
"loss": 1.5403, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 2.113265755839577, |
|
"grad_norm": 0.2926942813060104, |
|
"learning_rate": 4.867360004909635e-05, |
|
"loss": 1.754, |
|
"step": 4795 |
|
}, |
|
{ |
|
"epoch": 2.115469369766417, |
|
"grad_norm": 0.3290438654965286, |
|
"learning_rate": 4.845370605213091e-05, |
|
"loss": 1.5578, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.117672983693257, |
|
"grad_norm": 0.3373040566376742, |
|
"learning_rate": 4.823415096406806e-05, |
|
"loss": 1.6939, |
|
"step": 4805 |
|
}, |
|
{ |
|
"epoch": 2.119876597620097, |
|
"grad_norm": 0.34391563105196393, |
|
"learning_rate": 4.801493622844847e-05, |
|
"loss": 1.7067, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 2.122080211546937, |
|
"grad_norm": 0.3002969326243971, |
|
"learning_rate": 4.779606328657513e-05, |
|
"loss": 1.716, |
|
"step": 4815 |
|
}, |
|
{ |
|
"epoch": 2.124283825473777, |
|
"grad_norm": 0.3388857319061881, |
|
"learning_rate": 4.75775335775038e-05, |
|
"loss": 1.843, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 2.1264874394006172, |
|
"grad_norm": 0.4156596099940998, |
|
"learning_rate": 4.735934853803339e-05, |
|
"loss": 1.7106, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 2.128691053327457, |
|
"grad_norm": 0.2940085091207592, |
|
"learning_rate": 4.71415096026968e-05, |
|
"loss": 1.6581, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 2.130894667254297, |
|
"grad_norm": 0.306112268047742, |
|
"learning_rate": 4.692401820375134e-05, |
|
"loss": 1.7315, |
|
"step": 4835 |
|
}, |
|
{ |
|
"epoch": 2.133098281181137, |
|
"grad_norm": 0.29996735672531655, |
|
"learning_rate": 4.6706875771169265e-05, |
|
"loss": 1.649, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 2.135301895107977, |
|
"grad_norm": 0.27463359410451954, |
|
"learning_rate": 4.64900837326284e-05, |
|
"loss": 1.6608, |
|
"step": 4845 |
|
}, |
|
{ |
|
"epoch": 2.137505509034817, |
|
"grad_norm": 0.3165826117114464, |
|
"learning_rate": 4.627364351350288e-05, |
|
"loss": 1.6793, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.139709122961657, |
|
"grad_norm": 0.3469777212157657, |
|
"learning_rate": 4.605755653685366e-05, |
|
"loss": 1.7027, |
|
"step": 4855 |
|
}, |
|
{ |
|
"epoch": 2.1419127368884974, |
|
"grad_norm": 0.28698621030093346, |
|
"learning_rate": 4.584182422341915e-05, |
|
"loss": 1.6516, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 2.144116350815337, |
|
"grad_norm": 0.3294649962724426, |
|
"learning_rate": 4.562644799160585e-05, |
|
"loss": 1.7214, |
|
"step": 4865 |
|
}, |
|
{ |
|
"epoch": 2.1463199647421773, |
|
"grad_norm": 0.3185351974138239, |
|
"learning_rate": 4.541142925747919e-05, |
|
"loss": 1.6362, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 2.148523578669017, |
|
"grad_norm": 0.3126109058989699, |
|
"learning_rate": 4.519676943475408e-05, |
|
"loss": 1.7064, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.1507271925958573, |
|
"grad_norm": 0.5380540251828319, |
|
"learning_rate": 4.4982469934785574e-05, |
|
"loss": 1.6943, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 2.152930806522697, |
|
"grad_norm": 0.3435754281589444, |
|
"learning_rate": 4.4768532166559763e-05, |
|
"loss": 1.715, |
|
"step": 4885 |
|
}, |
|
{ |
|
"epoch": 2.1551344204495373, |
|
"grad_norm": 0.28522234926734513, |
|
"learning_rate": 4.455495753668428e-05, |
|
"loss": 1.3429, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 2.157338034376377, |
|
"grad_norm": 0.29145572840002815, |
|
"learning_rate": 4.4341747449379335e-05, |
|
"loss": 1.5995, |
|
"step": 4895 |
|
}, |
|
{ |
|
"epoch": 2.1595416483032173, |
|
"grad_norm": 0.47296843361442037, |
|
"learning_rate": 4.412890330646815e-05, |
|
"loss": 1.8911, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.1617452622300575, |
|
"grad_norm": 0.3101777212408002, |
|
"learning_rate": 4.391642650736811e-05, |
|
"loss": 1.5388, |
|
"step": 4905 |
|
}, |
|
{ |
|
"epoch": 2.1639488761568972, |
|
"grad_norm": 0.27664700170021256, |
|
"learning_rate": 4.370431844908119e-05, |
|
"loss": 1.5866, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 2.1661524900837374, |
|
"grad_norm": 0.29560765708014347, |
|
"learning_rate": 4.349258052618509e-05, |
|
"loss": 1.7198, |
|
"step": 4915 |
|
}, |
|
{ |
|
"epoch": 2.168356104010577, |
|
"grad_norm": 0.3363838001346494, |
|
"learning_rate": 4.328121413082388e-05, |
|
"loss": 1.6872, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 2.1705597179374174, |
|
"grad_norm": 0.2850203542162651, |
|
"learning_rate": 4.307022065269887e-05, |
|
"loss": 1.6207, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 2.172763331864257, |
|
"grad_norm": 0.32285248455004484, |
|
"learning_rate": 4.285960147905946e-05, |
|
"loss": 1.6117, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 2.1749669457910974, |
|
"grad_norm": 0.27430213811378995, |
|
"learning_rate": 4.264935799469417e-05, |
|
"loss": 1.6949, |
|
"step": 4935 |
|
}, |
|
{ |
|
"epoch": 2.1771705597179376, |
|
"grad_norm": 0.46396848778851696, |
|
"learning_rate": 4.2439491581921373e-05, |
|
"loss": 1.6883, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 2.1793741736447774, |
|
"grad_norm": 0.31449151532095687, |
|
"learning_rate": 4.223000362058023e-05, |
|
"loss": 1.6213, |
|
"step": 4945 |
|
}, |
|
{ |
|
"epoch": 2.1815777875716176, |
|
"grad_norm": 0.27602615724586665, |
|
"learning_rate": 4.202089548802157e-05, |
|
"loss": 1.6365, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.1837814014984573, |
|
"grad_norm": 0.2989766105095882, |
|
"learning_rate": 4.181216855909913e-05, |
|
"loss": 1.6936, |
|
"step": 4955 |
|
}, |
|
{ |
|
"epoch": 2.1859850154252976, |
|
"grad_norm": 0.29722828765908, |
|
"learning_rate": 4.16038242061601e-05, |
|
"loss": 1.5737, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 2.1881886293521373, |
|
"grad_norm": 0.2573098385617781, |
|
"learning_rate": 4.139586379903629e-05, |
|
"loss": 1.6852, |
|
"step": 4965 |
|
}, |
|
{ |
|
"epoch": 2.1903922432789775, |
|
"grad_norm": 0.2951816124750441, |
|
"learning_rate": 4.1188288705035226e-05, |
|
"loss": 1.645, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 2.1925958572058177, |
|
"grad_norm": 0.33288427202987686, |
|
"learning_rate": 4.098110028893105e-05, |
|
"loss": 1.5257, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 2.1947994711326575, |
|
"grad_norm": 0.3489398250177744, |
|
"learning_rate": 4.077429991295549e-05, |
|
"loss": 1.6671, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 2.1970030850594977, |
|
"grad_norm": 0.2899374350308383, |
|
"learning_rate": 4.056788893678898e-05, |
|
"loss": 1.6132, |
|
"step": 4985 |
|
}, |
|
{ |
|
"epoch": 2.1992066989863375, |
|
"grad_norm": 0.3500814252732699, |
|
"learning_rate": 4.036186871755173e-05, |
|
"loss": 1.5695, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 2.2014103129131777, |
|
"grad_norm": 0.3687163067932294, |
|
"learning_rate": 4.015624060979486e-05, |
|
"loss": 1.5143, |
|
"step": 4995 |
|
}, |
|
{ |
|
"epoch": 2.2036139268400174, |
|
"grad_norm": 0.33423795880289425, |
|
"learning_rate": 3.995100596549128e-05, |
|
"loss": 1.6156, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.2058175407668577, |
|
"grad_norm": 0.2868033623703877, |
|
"learning_rate": 3.9746166134026995e-05, |
|
"loss": 1.5561, |
|
"step": 5005 |
|
}, |
|
{ |
|
"epoch": 2.208021154693698, |
|
"grad_norm": 0.34323096599301134, |
|
"learning_rate": 3.9541722462192196e-05, |
|
"loss": 1.4618, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 2.2102247686205376, |
|
"grad_norm": 0.27958346490895863, |
|
"learning_rate": 3.9337676294172424e-05, |
|
"loss": 1.7082, |
|
"step": 5015 |
|
}, |
|
{ |
|
"epoch": 2.212428382547378, |
|
"grad_norm": 0.3257321749343037, |
|
"learning_rate": 3.913402897153957e-05, |
|
"loss": 1.6946, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 2.2146319964742176, |
|
"grad_norm": 0.3365177337266649, |
|
"learning_rate": 3.893078183324329e-05, |
|
"loss": 1.6428, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 2.216835610401058, |
|
"grad_norm": 0.34401308894946486, |
|
"learning_rate": 3.8727936215602077e-05, |
|
"loss": 1.5488, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 2.2190392243278976, |
|
"grad_norm": 0.2845220746832339, |
|
"learning_rate": 3.852549345229445e-05, |
|
"loss": 1.6519, |
|
"step": 5035 |
|
}, |
|
{ |
|
"epoch": 2.221242838254738, |
|
"grad_norm": 0.25529471083550676, |
|
"learning_rate": 3.832345487435019e-05, |
|
"loss": 1.8166, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.223446452181578, |
|
"grad_norm": 0.3591343853259783, |
|
"learning_rate": 3.812182181014169e-05, |
|
"loss": 1.7223, |
|
"step": 5045 |
|
}, |
|
{ |
|
"epoch": 2.2256500661084178, |
|
"grad_norm": 0.3054726611280714, |
|
"learning_rate": 3.792059558537518e-05, |
|
"loss": 1.8144, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.227853680035258, |
|
"grad_norm": 0.30167111375727146, |
|
"learning_rate": 3.7719777523081864e-05, |
|
"loss": 1.5961, |
|
"step": 5055 |
|
}, |
|
{ |
|
"epoch": 2.2300572939620977, |
|
"grad_norm": 0.2916710789608964, |
|
"learning_rate": 3.751936894360949e-05, |
|
"loss": 1.7809, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.232260907888938, |
|
"grad_norm": 0.3732355683913339, |
|
"learning_rate": 3.731937116461336e-05, |
|
"loss": 1.6552, |
|
"step": 5065 |
|
}, |
|
{ |
|
"epoch": 2.2344645218157777, |
|
"grad_norm": 0.35016354658091353, |
|
"learning_rate": 3.7119785501047977e-05, |
|
"loss": 1.649, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 2.236668135742618, |
|
"grad_norm": 0.34880893384696754, |
|
"learning_rate": 3.6920613265158124e-05, |
|
"loss": 1.5914, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 2.2388717496694577, |
|
"grad_norm": 0.3026190037167945, |
|
"learning_rate": 3.672185576647047e-05, |
|
"loss": 1.5736, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.241075363596298, |
|
"grad_norm": 0.3484793101368692, |
|
"learning_rate": 3.652351431178473e-05, |
|
"loss": 1.7296, |
|
"step": 5085 |
|
}, |
|
{ |
|
"epoch": 2.243278977523138, |
|
"grad_norm": 0.2587698411979413, |
|
"learning_rate": 3.6325590205165314e-05, |
|
"loss": 1.7112, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 2.245482591449978, |
|
"grad_norm": 0.25882732375987005, |
|
"learning_rate": 3.612808474793261e-05, |
|
"loss": 1.7223, |
|
"step": 5095 |
|
}, |
|
{ |
|
"epoch": 2.247686205376818, |
|
"grad_norm": 0.27481092620324143, |
|
"learning_rate": 3.593099923865438e-05, |
|
"loss": 1.5473, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.249889819303658, |
|
"grad_norm": 0.3411799767876264, |
|
"learning_rate": 3.573433497313731e-05, |
|
"loss": 1.6459, |
|
"step": 5105 |
|
}, |
|
{ |
|
"epoch": 2.252093433230498, |
|
"grad_norm": 0.2887170405054378, |
|
"learning_rate": 3.5538093244418525e-05, |
|
"loss": 1.6195, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 2.254297047157338, |
|
"grad_norm": 0.3884613083691619, |
|
"learning_rate": 3.5342275342757046e-05, |
|
"loss": 1.8638, |
|
"step": 5115 |
|
}, |
|
{ |
|
"epoch": 2.256500661084178, |
|
"grad_norm": 0.28793406477711025, |
|
"learning_rate": 3.5146882555625226e-05, |
|
"loss": 1.5124, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 2.2587042750110182, |
|
"grad_norm": 0.30122690590343093, |
|
"learning_rate": 3.495191616770034e-05, |
|
"loss": 1.7147, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 2.260907888937858, |
|
"grad_norm": 0.27840602962105204, |
|
"learning_rate": 3.475737746085631e-05, |
|
"loss": 1.5467, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 2.263111502864698, |
|
"grad_norm": 0.3165143945785378, |
|
"learning_rate": 3.456326771415498e-05, |
|
"loss": 1.6215, |
|
"step": 5135 |
|
}, |
|
{ |
|
"epoch": 2.265315116791538, |
|
"grad_norm": 0.315000953366212, |
|
"learning_rate": 3.436958820383783e-05, |
|
"loss": 1.5548, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.267518730718378, |
|
"grad_norm": 0.29091408470720853, |
|
"learning_rate": 3.417634020331769e-05, |
|
"loss": 1.786, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 2.269722344645218, |
|
"grad_norm": 0.335006684656636, |
|
"learning_rate": 3.398352498317029e-05, |
|
"loss": 1.6015, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.271925958572058, |
|
"grad_norm": 0.3244552469471718, |
|
"learning_rate": 3.379114381112581e-05, |
|
"loss": 1.653, |
|
"step": 5155 |
|
}, |
|
{ |
|
"epoch": 2.2741295724988984, |
|
"grad_norm": 0.32402505918566016, |
|
"learning_rate": 3.359919795206065e-05, |
|
"loss": 1.5578, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.276333186425738, |
|
"grad_norm": 0.3417399295841799, |
|
"learning_rate": 3.3407688667989124e-05, |
|
"loss": 1.8143, |
|
"step": 5165 |
|
}, |
|
{ |
|
"epoch": 2.2785368003525783, |
|
"grad_norm": 0.3293628623372523, |
|
"learning_rate": 3.321661721805519e-05, |
|
"loss": 1.62, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 2.280740414279418, |
|
"grad_norm": 0.3594982936202251, |
|
"learning_rate": 3.302598485852401e-05, |
|
"loss": 1.5937, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 2.2829440282062583, |
|
"grad_norm": 0.2986899594367847, |
|
"learning_rate": 3.283579284277378e-05, |
|
"loss": 1.5761, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.285147642133098, |
|
"grad_norm": 0.2633442789610725, |
|
"learning_rate": 3.2646042421287625e-05, |
|
"loss": 1.7272, |
|
"step": 5185 |
|
}, |
|
{ |
|
"epoch": 2.2873512560599383, |
|
"grad_norm": 0.3664081223341677, |
|
"learning_rate": 3.245673484164521e-05, |
|
"loss": 1.6607, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 2.2895548699867785, |
|
"grad_norm": 0.36095917628329555, |
|
"learning_rate": 3.2267871348514475e-05, |
|
"loss": 1.6644, |
|
"step": 5195 |
|
}, |
|
{ |
|
"epoch": 2.2917584839136182, |
|
"grad_norm": 0.3155817104820564, |
|
"learning_rate": 3.207945318364376e-05, |
|
"loss": 1.7833, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.2939620978404585, |
|
"grad_norm": 0.5525894084347752, |
|
"learning_rate": 3.1891481585853224e-05, |
|
"loss": 1.7846, |
|
"step": 5205 |
|
}, |
|
{ |
|
"epoch": 2.2961657117672982, |
|
"grad_norm": 0.3183611553729405, |
|
"learning_rate": 3.1703957791027104e-05, |
|
"loss": 1.8015, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 2.2983693256941384, |
|
"grad_norm": 0.3341156230287187, |
|
"learning_rate": 3.151688303210525e-05, |
|
"loss": 1.4901, |
|
"step": 5215 |
|
}, |
|
{ |
|
"epoch": 2.300572939620978, |
|
"grad_norm": 0.29108487309353126, |
|
"learning_rate": 3.133025853907531e-05, |
|
"loss": 1.6021, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 2.3027765535478184, |
|
"grad_norm": 0.31003713411448125, |
|
"learning_rate": 3.114408553896437e-05, |
|
"loss": 1.6835, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 2.3049801674746586, |
|
"grad_norm": 0.31639971696298724, |
|
"learning_rate": 3.09583652558311e-05, |
|
"loss": 1.7131, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 2.3071837814014984, |
|
"grad_norm": 0.320459066996447, |
|
"learning_rate": 3.077309891075766e-05, |
|
"loss": 1.7207, |
|
"step": 5235 |
|
}, |
|
{ |
|
"epoch": 2.3093873953283386, |
|
"grad_norm": 0.3658345444037042, |
|
"learning_rate": 3.058828772184155e-05, |
|
"loss": 1.637, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 2.3115910092551784, |
|
"grad_norm": 0.3119356619189155, |
|
"learning_rate": 3.0403932904187694e-05, |
|
"loss": 1.7374, |
|
"step": 5245 |
|
}, |
|
{ |
|
"epoch": 2.3137946231820186, |
|
"grad_norm": 0.34062681606017264, |
|
"learning_rate": 3.0220035669900493e-05, |
|
"loss": 1.3662, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.3159982371088583, |
|
"grad_norm": 0.3231089104173347, |
|
"learning_rate": 3.0036597228075847e-05, |
|
"loss": 1.7862, |
|
"step": 5255 |
|
}, |
|
{ |
|
"epoch": 2.3182018510356985, |
|
"grad_norm": 0.2845067164587982, |
|
"learning_rate": 2.985361878479307e-05, |
|
"loss": 1.6374, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 2.3204054649625387, |
|
"grad_norm": 0.3158916323796938, |
|
"learning_rate": 2.9671101543107037e-05, |
|
"loss": 1.7791, |
|
"step": 5265 |
|
}, |
|
{ |
|
"epoch": 2.3226090788893785, |
|
"grad_norm": 0.3473567438345015, |
|
"learning_rate": 2.9489046703040478e-05, |
|
"loss": 1.6438, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 2.3248126928162187, |
|
"grad_norm": 0.32956604729846517, |
|
"learning_rate": 2.9307455461575728e-05, |
|
"loss": 1.5174, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 2.3270163067430585, |
|
"grad_norm": 0.31139915651994976, |
|
"learning_rate": 2.9126329012647048e-05, |
|
"loss": 1.6661, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 2.3292199206698987, |
|
"grad_norm": 0.3916001778917859, |
|
"learning_rate": 2.894566854713283e-05, |
|
"loss": 1.7324, |
|
"step": 5285 |
|
}, |
|
{ |
|
"epoch": 2.3314235345967385, |
|
"grad_norm": 0.33538535373990935, |
|
"learning_rate": 2.8765475252847696e-05, |
|
"loss": 1.8397, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 2.3336271485235787, |
|
"grad_norm": 0.4045876334606375, |
|
"learning_rate": 2.8585750314534633e-05, |
|
"loss": 1.761, |
|
"step": 5295 |
|
}, |
|
{ |
|
"epoch": 2.335830762450419, |
|
"grad_norm": 0.34476500843118907, |
|
"learning_rate": 2.8406494913857264e-05, |
|
"loss": 1.7239, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.3380343763772586, |
|
"grad_norm": 0.3377854881404091, |
|
"learning_rate": 2.8227710229392102e-05, |
|
"loss": 1.6767, |
|
"step": 5305 |
|
}, |
|
{ |
|
"epoch": 2.340237990304099, |
|
"grad_norm": 0.2943468051471504, |
|
"learning_rate": 2.8049397436620817e-05, |
|
"loss": 1.7027, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 2.3424416042309386, |
|
"grad_norm": 0.3325646272609782, |
|
"learning_rate": 2.7871557707922356e-05, |
|
"loss": 1.7092, |
|
"step": 5315 |
|
}, |
|
{ |
|
"epoch": 2.344645218157779, |
|
"grad_norm": 0.3194408987322575, |
|
"learning_rate": 2.769419221256546e-05, |
|
"loss": 1.5551, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 2.3468488320846186, |
|
"grad_norm": 0.3154791851100986, |
|
"learning_rate": 2.751730211670075e-05, |
|
"loss": 1.5952, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 2.349052446011459, |
|
"grad_norm": 0.37152524303673207, |
|
"learning_rate": 2.7340888583353263e-05, |
|
"loss": 1.6328, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 2.351256059938299, |
|
"grad_norm": 0.364737719498444, |
|
"learning_rate": 2.716495277241463e-05, |
|
"loss": 1.5125, |
|
"step": 5335 |
|
}, |
|
{ |
|
"epoch": 2.3534596738651388, |
|
"grad_norm": 0.3524349639748066, |
|
"learning_rate": 2.6989495840635615e-05, |
|
"loss": 1.6589, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 2.355663287791979, |
|
"grad_norm": 0.27640673317944237, |
|
"learning_rate": 2.6814518941618326e-05, |
|
"loss": 1.5661, |
|
"step": 5345 |
|
}, |
|
{ |
|
"epoch": 2.3578669017188187, |
|
"grad_norm": 0.31837314971643976, |
|
"learning_rate": 2.6640023225808852e-05, |
|
"loss": 1.7214, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.360070515645659, |
|
"grad_norm": 0.29946292415188364, |
|
"learning_rate": 2.6466009840489436e-05, |
|
"loss": 1.4745, |
|
"step": 5355 |
|
}, |
|
{ |
|
"epoch": 2.3622741295724987, |
|
"grad_norm": 0.3368911389398718, |
|
"learning_rate": 2.629247992977122e-05, |
|
"loss": 1.6371, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 2.364477743499339, |
|
"grad_norm": 0.3078861715002911, |
|
"learning_rate": 2.6119434634586427e-05, |
|
"loss": 1.6562, |
|
"step": 5365 |
|
}, |
|
{ |
|
"epoch": 2.366681357426179, |
|
"grad_norm": 0.39044087680489714, |
|
"learning_rate": 2.5946875092681134e-05, |
|
"loss": 1.7854, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 2.368884971353019, |
|
"grad_norm": 0.3905475645799491, |
|
"learning_rate": 2.5774802438607627e-05, |
|
"loss": 1.7027, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 2.371088585279859, |
|
"grad_norm": 0.3344158542218199, |
|
"learning_rate": 2.5603217803716938e-05, |
|
"loss": 1.6856, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 2.373292199206699, |
|
"grad_norm": 0.3171235587081351, |
|
"learning_rate": 2.5432122316151463e-05, |
|
"loss": 1.6338, |
|
"step": 5385 |
|
}, |
|
{ |
|
"epoch": 2.375495813133539, |
|
"grad_norm": 0.31814287079371045, |
|
"learning_rate": 2.5261517100837563e-05, |
|
"loss": 1.6072, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 2.377699427060379, |
|
"grad_norm": 0.3715801248464969, |
|
"learning_rate": 2.509140327947814e-05, |
|
"loss": 1.7025, |
|
"step": 5395 |
|
}, |
|
{ |
|
"epoch": 2.379903040987219, |
|
"grad_norm": 0.30689235196019754, |
|
"learning_rate": 2.4921781970545178e-05, |
|
"loss": 1.704, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.3821066549140593, |
|
"grad_norm": 0.2783210103898173, |
|
"learning_rate": 2.4752654289272568e-05, |
|
"loss": 1.8138, |
|
"step": 5405 |
|
}, |
|
{ |
|
"epoch": 2.384310268840899, |
|
"grad_norm": 0.4115102536349836, |
|
"learning_rate": 2.4584021347648645e-05, |
|
"loss": 1.7562, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 2.3865138827677392, |
|
"grad_norm": 0.320397115810562, |
|
"learning_rate": 2.441588425440886e-05, |
|
"loss": 1.7002, |
|
"step": 5415 |
|
}, |
|
{ |
|
"epoch": 2.388717496694579, |
|
"grad_norm": 0.32551710279990625, |
|
"learning_rate": 2.424824411502856e-05, |
|
"loss": 1.6053, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 2.390921110621419, |
|
"grad_norm": 0.37060732842102345, |
|
"learning_rate": 2.408110203171572e-05, |
|
"loss": 1.6564, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 2.393124724548259, |
|
"grad_norm": 0.33585884001747096, |
|
"learning_rate": 2.3914459103403696e-05, |
|
"loss": 1.7012, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 2.395328338475099, |
|
"grad_norm": 0.297871745937653, |
|
"learning_rate": 2.374831642574392e-05, |
|
"loss": 1.7399, |
|
"step": 5435 |
|
}, |
|
{ |
|
"epoch": 2.3975319524019394, |
|
"grad_norm": 0.28026347006814506, |
|
"learning_rate": 2.3582675091098717e-05, |
|
"loss": 1.6698, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 2.399735566328779, |
|
"grad_norm": 0.3092926951118008, |
|
"learning_rate": 2.3417536188534327e-05, |
|
"loss": 1.7019, |
|
"step": 5445 |
|
}, |
|
{ |
|
"epoch": 2.4019391802556194, |
|
"grad_norm": 0.29956315407231204, |
|
"learning_rate": 2.3252900803813415e-05, |
|
"loss": 1.7835, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.404142794182459, |
|
"grad_norm": 0.3472861996721381, |
|
"learning_rate": 2.3088770019388116e-05, |
|
"loss": 1.7523, |
|
"step": 5455 |
|
}, |
|
{ |
|
"epoch": 2.4063464081092993, |
|
"grad_norm": 0.3346967409970768, |
|
"learning_rate": 2.292514491439297e-05, |
|
"loss": 1.543, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.408550022036139, |
|
"grad_norm": 0.30667047442127315, |
|
"learning_rate": 2.2762026564637717e-05, |
|
"loss": 1.7131, |
|
"step": 5465 |
|
}, |
|
{ |
|
"epoch": 2.4107536359629793, |
|
"grad_norm": 0.32172911896887835, |
|
"learning_rate": 2.259941604260024e-05, |
|
"loss": 1.4888, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 2.4129572498898195, |
|
"grad_norm": 0.33661859838509534, |
|
"learning_rate": 2.2437314417419518e-05, |
|
"loss": 1.6434, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 2.4151608638166593, |
|
"grad_norm": 0.2766235524609549, |
|
"learning_rate": 2.2275722754888662e-05, |
|
"loss": 1.4497, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.4173644777434995, |
|
"grad_norm": 0.31531488102987404, |
|
"learning_rate": 2.211464211744787e-05, |
|
"loss": 1.7619, |
|
"step": 5485 |
|
}, |
|
{ |
|
"epoch": 2.4195680916703393, |
|
"grad_norm": 0.4045821730130733, |
|
"learning_rate": 2.195407356417737e-05, |
|
"loss": 1.5253, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 2.4217717055971795, |
|
"grad_norm": 0.3077739063590653, |
|
"learning_rate": 2.1794018150790507e-05, |
|
"loss": 1.3737, |
|
"step": 5495 |
|
}, |
|
{ |
|
"epoch": 2.4239753195240192, |
|
"grad_norm": 0.26933598773812595, |
|
"learning_rate": 2.1634476929626868e-05, |
|
"loss": 1.5562, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.4261789334508594, |
|
"grad_norm": 0.3180561976663089, |
|
"learning_rate": 2.1475450949645325e-05, |
|
"loss": 1.8415, |
|
"step": 5505 |
|
}, |
|
{ |
|
"epoch": 2.4283825473776997, |
|
"grad_norm": 0.326460873305707, |
|
"learning_rate": 2.1316941256417024e-05, |
|
"loss": 1.6886, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 2.4305861613045394, |
|
"grad_norm": 0.2651305009582311, |
|
"learning_rate": 2.115894889211869e-05, |
|
"loss": 1.556, |
|
"step": 5515 |
|
}, |
|
{ |
|
"epoch": 2.4327897752313796, |
|
"grad_norm": 0.285018315967615, |
|
"learning_rate": 2.100147489552562e-05, |
|
"loss": 1.6264, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 2.4349933891582194, |
|
"grad_norm": 0.3039400323881749, |
|
"learning_rate": 2.084452030200502e-05, |
|
"loss": 1.68, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 2.4371970030850596, |
|
"grad_norm": 0.4530331306706194, |
|
"learning_rate": 2.068808614350899e-05, |
|
"loss": 1.8822, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 2.4394006170118994, |
|
"grad_norm": 0.32124417856720616, |
|
"learning_rate": 2.0532173448567936e-05, |
|
"loss": 1.6896, |
|
"step": 5535 |
|
}, |
|
{ |
|
"epoch": 2.4416042309387396, |
|
"grad_norm": 0.3526640578616687, |
|
"learning_rate": 2.037678324228366e-05, |
|
"loss": 1.3874, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 2.44380784486558, |
|
"grad_norm": 0.3025841716329574, |
|
"learning_rate": 2.022191654632274e-05, |
|
"loss": 1.5026, |
|
"step": 5545 |
|
}, |
|
{ |
|
"epoch": 2.4460114587924195, |
|
"grad_norm": 0.3451825131993311, |
|
"learning_rate": 2.0067574378909726e-05, |
|
"loss": 1.6466, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.4482150727192598, |
|
"grad_norm": 0.2946116685019073, |
|
"learning_rate": 1.9913757754820483e-05, |
|
"loss": 1.7246, |
|
"step": 5555 |
|
}, |
|
{ |
|
"epoch": 2.4504186866460995, |
|
"grad_norm": 0.3604250683804044, |
|
"learning_rate": 1.976046768537544e-05, |
|
"loss": 1.5967, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 2.4526223005729397, |
|
"grad_norm": 0.356529608888727, |
|
"learning_rate": 1.9607705178433124e-05, |
|
"loss": 1.8344, |
|
"step": 5565 |
|
}, |
|
{ |
|
"epoch": 2.4548259144997795, |
|
"grad_norm": 0.3062637993405146, |
|
"learning_rate": 1.9455471238383394e-05, |
|
"loss": 1.727, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 2.4570295284266197, |
|
"grad_norm": 0.2913261589880019, |
|
"learning_rate": 1.9303766866140794e-05, |
|
"loss": 1.6422, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 2.45923314235346, |
|
"grad_norm": 0.28539967100565233, |
|
"learning_rate": 1.9152593059138036e-05, |
|
"loss": 1.5191, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 2.4614367562802997, |
|
"grad_norm": 0.38278133166473177, |
|
"learning_rate": 1.9001950811319624e-05, |
|
"loss": 1.747, |
|
"step": 5585 |
|
}, |
|
{ |
|
"epoch": 2.46364037020714, |
|
"grad_norm": 0.3213863995857726, |
|
"learning_rate": 1.885184111313494e-05, |
|
"loss": 1.6493, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 2.4658439841339796, |
|
"grad_norm": 0.3515687604082846, |
|
"learning_rate": 1.870226495153199e-05, |
|
"loss": 1.6207, |
|
"step": 5595 |
|
}, |
|
{ |
|
"epoch": 2.46804759806082, |
|
"grad_norm": 0.2818899743965069, |
|
"learning_rate": 1.8553223309950907e-05, |
|
"loss": 1.5783, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.4702512119876596, |
|
"grad_norm": 0.32339197111018464, |
|
"learning_rate": 1.8404717168317444e-05, |
|
"loss": 1.6023, |
|
"step": 5605 |
|
}, |
|
{ |
|
"epoch": 2.4724548259145, |
|
"grad_norm": 0.32235437648153625, |
|
"learning_rate": 1.8256747503036465e-05, |
|
"loss": 1.5901, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 2.47465843984134, |
|
"grad_norm": 0.2996816683579224, |
|
"learning_rate": 1.8109315286985575e-05, |
|
"loss": 1.7065, |
|
"step": 5615 |
|
}, |
|
{ |
|
"epoch": 2.47686205376818, |
|
"grad_norm": 0.34309522162413447, |
|
"learning_rate": 1.7962421489508797e-05, |
|
"loss": 1.7226, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 2.47906566769502, |
|
"grad_norm": 0.29849313259138993, |
|
"learning_rate": 1.7816067076410138e-05, |
|
"loss": 1.7579, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 2.48126928162186, |
|
"grad_norm": 0.3368071136503966, |
|
"learning_rate": 1.7670253009947146e-05, |
|
"loss": 1.5962, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 2.4834728955487, |
|
"grad_norm": 0.3175198598873115, |
|
"learning_rate": 1.7524980248824806e-05, |
|
"loss": 1.7556, |
|
"step": 5635 |
|
}, |
|
{ |
|
"epoch": 2.4856765094755398, |
|
"grad_norm": 0.30889458937514946, |
|
"learning_rate": 1.738024974818896e-05, |
|
"loss": 1.7268, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 2.48788012340238, |
|
"grad_norm": 0.30454000273861387, |
|
"learning_rate": 1.7236062459620306e-05, |
|
"loss": 1.6084, |
|
"step": 5645 |
|
}, |
|
{ |
|
"epoch": 2.49008373732922, |
|
"grad_norm": 0.30973625136809374, |
|
"learning_rate": 1.7092419331127894e-05, |
|
"loss": 1.581, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.49228735125606, |
|
"grad_norm": 0.3312428081002817, |
|
"learning_rate": 1.6949321307143096e-05, |
|
"loss": 1.6826, |
|
"step": 5655 |
|
}, |
|
{ |
|
"epoch": 2.4944909651829, |
|
"grad_norm": 0.29438754562772956, |
|
"learning_rate": 1.6806769328513226e-05, |
|
"loss": 1.6531, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 2.49669457910974, |
|
"grad_norm": 0.3072379816792179, |
|
"learning_rate": 1.666476433249552e-05, |
|
"loss": 1.6036, |
|
"step": 5665 |
|
}, |
|
{ |
|
"epoch": 2.49889819303658, |
|
"grad_norm": 0.2653188527319854, |
|
"learning_rate": 1.6523307252750787e-05, |
|
"loss": 1.6677, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 2.50110180696342, |
|
"grad_norm": 0.31853001670175285, |
|
"learning_rate": 1.6382399019337493e-05, |
|
"loss": 1.7512, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 2.50330542089026, |
|
"grad_norm": 0.3541014940940505, |
|
"learning_rate": 1.6242040558705386e-05, |
|
"loss": 1.4784, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 2.5055090348171003, |
|
"grad_norm": 0.3022737220008577, |
|
"learning_rate": 1.6102232793689652e-05, |
|
"loss": 1.552, |
|
"step": 5685 |
|
}, |
|
{ |
|
"epoch": 2.50771264874394, |
|
"grad_norm": 0.2865580516371412, |
|
"learning_rate": 1.5962976643504734e-05, |
|
"loss": 1.6162, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 2.50991626267078, |
|
"grad_norm": 0.33229646654342737, |
|
"learning_rate": 1.5824273023738223e-05, |
|
"loss": 1.7025, |
|
"step": 5695 |
|
}, |
|
{ |
|
"epoch": 2.51211987659762, |
|
"grad_norm": 0.25315774233489335, |
|
"learning_rate": 1.5686122846344932e-05, |
|
"loss": 1.6556, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.5143234905244602, |
|
"grad_norm": 0.32926703632524384, |
|
"learning_rate": 1.55485270196409e-05, |
|
"loss": 1.7055, |
|
"step": 5705 |
|
}, |
|
{ |
|
"epoch": 2.5165271044513, |
|
"grad_norm": 0.38254621782613324, |
|
"learning_rate": 1.541148644829743e-05, |
|
"loss": 1.8189, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 2.51873071837814, |
|
"grad_norm": 0.3262138673835723, |
|
"learning_rate": 1.5275002033335016e-05, |
|
"loss": 1.6328, |
|
"step": 5715 |
|
}, |
|
{ |
|
"epoch": 2.5209343323049804, |
|
"grad_norm": 0.36898284620242594, |
|
"learning_rate": 1.5139074672117514e-05, |
|
"loss": 1.7229, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 2.52313794623182, |
|
"grad_norm": 0.29383774220673775, |
|
"learning_rate": 1.500370525834639e-05, |
|
"loss": 1.7057, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 2.52534156015866, |
|
"grad_norm": 0.35255942435868104, |
|
"learning_rate": 1.4868894682054535e-05, |
|
"loss": 1.703, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 2.5275451740855, |
|
"grad_norm": 0.3224975998851544, |
|
"learning_rate": 1.473464382960057e-05, |
|
"loss": 1.6255, |
|
"step": 5735 |
|
}, |
|
{ |
|
"epoch": 2.5297487880123404, |
|
"grad_norm": 0.2728863606551014, |
|
"learning_rate": 1.4600953583663114e-05, |
|
"loss": 1.5348, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 2.53195240193918, |
|
"grad_norm": 0.3292693963632781, |
|
"learning_rate": 1.4467824823234843e-05, |
|
"loss": 1.6536, |
|
"step": 5745 |
|
}, |
|
{ |
|
"epoch": 2.5341560158660204, |
|
"grad_norm": 0.3530164649944846, |
|
"learning_rate": 1.4335258423616737e-05, |
|
"loss": 1.631, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.5363596297928606, |
|
"grad_norm": 0.3107181811257503, |
|
"learning_rate": 1.4203255256412318e-05, |
|
"loss": 1.5969, |
|
"step": 5755 |
|
}, |
|
{ |
|
"epoch": 2.5385632437197003, |
|
"grad_norm": 0.3700803583018722, |
|
"learning_rate": 1.407181618952199e-05, |
|
"loss": 1.7883, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 2.54076685764654, |
|
"grad_norm": 0.30243816752307084, |
|
"learning_rate": 1.394094208713732e-05, |
|
"loss": 1.652, |
|
"step": 5765 |
|
}, |
|
{ |
|
"epoch": 2.5429704715733803, |
|
"grad_norm": 0.3413616757398393, |
|
"learning_rate": 1.3810633809735196e-05, |
|
"loss": 1.7507, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 2.5451740855002205, |
|
"grad_norm": 0.3120087119800215, |
|
"learning_rate": 1.3680892214072405e-05, |
|
"loss": 1.7198, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 2.5473776994270603, |
|
"grad_norm": 0.3518504932831635, |
|
"learning_rate": 1.3551718153179871e-05, |
|
"loss": 1.8579, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 2.5495813133539005, |
|
"grad_norm": 0.34752155952619, |
|
"learning_rate": 1.3423112476357036e-05, |
|
"loss": 1.5468, |
|
"step": 5785 |
|
}, |
|
{ |
|
"epoch": 2.5517849272807407, |
|
"grad_norm": 0.3260308226099618, |
|
"learning_rate": 1.3295076029166265e-05, |
|
"loss": 1.5258, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 2.5539885412075805, |
|
"grad_norm": 0.35584838362058835, |
|
"learning_rate": 1.3167609653427426e-05, |
|
"loss": 1.7373, |
|
"step": 5795 |
|
}, |
|
{ |
|
"epoch": 2.55619215513442, |
|
"grad_norm": 0.32184918024972864, |
|
"learning_rate": 1.304071418721221e-05, |
|
"loss": 1.6741, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.5583957690612604, |
|
"grad_norm": 0.315994062331127, |
|
"learning_rate": 1.2914390464838655e-05, |
|
"loss": 1.7156, |
|
"step": 5805 |
|
}, |
|
{ |
|
"epoch": 2.5605993829881006, |
|
"grad_norm": 0.23924524979523612, |
|
"learning_rate": 1.2788639316865635e-05, |
|
"loss": 1.738, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 2.5628029969149404, |
|
"grad_norm": 0.3098121698841723, |
|
"learning_rate": 1.266346157008753e-05, |
|
"loss": 1.5198, |
|
"step": 5815 |
|
}, |
|
{ |
|
"epoch": 2.5650066108417806, |
|
"grad_norm": 0.3262487762949137, |
|
"learning_rate": 1.2538858047528646e-05, |
|
"loss": 1.587, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 2.5672102247686204, |
|
"grad_norm": 0.33166973277913664, |
|
"learning_rate": 1.2414829568437825e-05, |
|
"loss": 1.5043, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 2.5694138386954606, |
|
"grad_norm": 0.3022890966007592, |
|
"learning_rate": 1.2291376948283139e-05, |
|
"loss": 1.605, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 2.5716174526223003, |
|
"grad_norm": 0.3389630036906691, |
|
"learning_rate": 1.2168500998746435e-05, |
|
"loss": 1.6955, |
|
"step": 5835 |
|
}, |
|
{ |
|
"epoch": 2.5738210665491406, |
|
"grad_norm": 0.3796380833169501, |
|
"learning_rate": 1.2046202527718076e-05, |
|
"loss": 1.6275, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 2.5760246804759808, |
|
"grad_norm": 0.3238245323234113, |
|
"learning_rate": 1.1924482339291554e-05, |
|
"loss": 1.7289, |
|
"step": 5845 |
|
}, |
|
{ |
|
"epoch": 2.5782282944028205, |
|
"grad_norm": 0.30218042381327415, |
|
"learning_rate": 1.1803341233758291e-05, |
|
"loss": 1.5412, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.5804319083296607, |
|
"grad_norm": 0.33206646260037787, |
|
"learning_rate": 1.1682780007602268e-05, |
|
"loss": 1.8011, |
|
"step": 5855 |
|
}, |
|
{ |
|
"epoch": 2.5826355222565005, |
|
"grad_norm": 0.3538395424898534, |
|
"learning_rate": 1.1562799453494899e-05, |
|
"loss": 1.7862, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 2.5848391361833407, |
|
"grad_norm": 0.35528233062631853, |
|
"learning_rate": 1.144340036028978e-05, |
|
"loss": 1.685, |
|
"step": 5865 |
|
}, |
|
{ |
|
"epoch": 2.5870427501101805, |
|
"grad_norm": 0.33412660972869573, |
|
"learning_rate": 1.132458351301744e-05, |
|
"loss": 1.7575, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 2.5892463640370207, |
|
"grad_norm": 0.3175660326892495, |
|
"learning_rate": 1.1206349692880236e-05, |
|
"loss": 1.5658, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 2.591449977963861, |
|
"grad_norm": 0.30007433696171515, |
|
"learning_rate": 1.1088699677247238e-05, |
|
"loss": 1.586, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 2.5936535918907007, |
|
"grad_norm": 0.3180669505120074, |
|
"learning_rate": 1.097163423964912e-05, |
|
"loss": 1.7819, |
|
"step": 5885 |
|
}, |
|
{ |
|
"epoch": 2.595857205817541, |
|
"grad_norm": 0.25188231699393177, |
|
"learning_rate": 1.0855154149772994e-05, |
|
"loss": 1.512, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 2.5980608197443806, |
|
"grad_norm": 0.2725825366458783, |
|
"learning_rate": 1.0739260173457355e-05, |
|
"loss": 1.7237, |
|
"step": 5895 |
|
}, |
|
{ |
|
"epoch": 2.600264433671221, |
|
"grad_norm": 0.4353012547028696, |
|
"learning_rate": 1.0623953072687265e-05, |
|
"loss": 1.5664, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.6024680475980606, |
|
"grad_norm": 0.3208919789812601, |
|
"learning_rate": 1.0509233605588997e-05, |
|
"loss": 1.7184, |
|
"step": 5905 |
|
}, |
|
{ |
|
"epoch": 2.604671661524901, |
|
"grad_norm": 0.2811764278977997, |
|
"learning_rate": 1.0395102526425282e-05, |
|
"loss": 1.6933, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 2.606875275451741, |
|
"grad_norm": 0.3283193021777617, |
|
"learning_rate": 1.0281560585590311e-05, |
|
"loss": 1.709, |
|
"step": 5915 |
|
}, |
|
{ |
|
"epoch": 2.609078889378581, |
|
"grad_norm": 0.31601773043700576, |
|
"learning_rate": 1.0168608529604783e-05, |
|
"loss": 1.5517, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 2.611282503305421, |
|
"grad_norm": 0.26388766629051863, |
|
"learning_rate": 1.0056247101110972e-05, |
|
"loss": 1.5716, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 2.6134861172322608, |
|
"grad_norm": 0.3170004934043696, |
|
"learning_rate": 9.944477038867838e-06, |
|
"loss": 1.3933, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 2.615689731159101, |
|
"grad_norm": 0.3026100677449506, |
|
"learning_rate": 9.833299077746261e-06, |
|
"loss": 1.538, |
|
"step": 5935 |
|
}, |
|
{ |
|
"epoch": 2.6178933450859407, |
|
"grad_norm": 0.32599901382842245, |
|
"learning_rate": 9.72271394872416e-06, |
|
"loss": 1.722, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 2.620096959012781, |
|
"grad_norm": 0.31252235277224677, |
|
"learning_rate": 9.612722378881578e-06, |
|
"loss": 1.5777, |
|
"step": 5945 |
|
}, |
|
{ |
|
"epoch": 2.622300572939621, |
|
"grad_norm": 0.27949230064797415, |
|
"learning_rate": 9.503325091396098e-06, |
|
"loss": 1.7781, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.624504186866461, |
|
"grad_norm": 0.3031767326868999, |
|
"learning_rate": 9.394522805537931e-06, |
|
"loss": 1.6123, |
|
"step": 5955 |
|
}, |
|
{ |
|
"epoch": 2.626707800793301, |
|
"grad_norm": 0.3596489809565846, |
|
"learning_rate": 9.286316236665271e-06, |
|
"loss": 1.8234, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 2.628911414720141, |
|
"grad_norm": 0.37067976899261396, |
|
"learning_rate": 9.178706096219547e-06, |
|
"loss": 1.5176, |
|
"step": 5965 |
|
}, |
|
{ |
|
"epoch": 2.631115028646981, |
|
"grad_norm": 0.30187017500054564, |
|
"learning_rate": 9.0716930917208e-06, |
|
"loss": 1.5401, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 2.633318642573821, |
|
"grad_norm": 0.338495742270805, |
|
"learning_rate": 8.965277926762916e-06, |
|
"loss": 1.5802, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 2.635522256500661, |
|
"grad_norm": 0.33527970219407616, |
|
"learning_rate": 8.859461301009186e-06, |
|
"loss": 1.6115, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 2.6377258704275013, |
|
"grad_norm": 0.3284526374151953, |
|
"learning_rate": 8.754243910187498e-06, |
|
"loss": 1.7051, |
|
"step": 5985 |
|
}, |
|
{ |
|
"epoch": 2.639929484354341, |
|
"grad_norm": 0.2820385948307506, |
|
"learning_rate": 8.649626446085945e-06, |
|
"loss": 1.4949, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 2.6421330982811813, |
|
"grad_norm": 0.3357888909663005, |
|
"learning_rate": 8.545609596548121e-06, |
|
"loss": 1.5265, |
|
"step": 5995 |
|
}, |
|
{ |
|
"epoch": 2.644336712208021, |
|
"grad_norm": 0.24829105724152342, |
|
"learning_rate": 8.442194045468733e-06, |
|
"loss": 1.4418, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.6465403261348612, |
|
"grad_norm": 0.3445954692738449, |
|
"learning_rate": 8.339380472789016e-06, |
|
"loss": 1.471, |
|
"step": 6005 |
|
}, |
|
{ |
|
"epoch": 2.648743940061701, |
|
"grad_norm": 0.34193200011238856, |
|
"learning_rate": 8.237169554492297e-06, |
|
"loss": 1.4714, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 2.650947553988541, |
|
"grad_norm": 0.2994838498414857, |
|
"learning_rate": 8.135561962599514e-06, |
|
"loss": 1.5747, |
|
"step": 6015 |
|
}, |
|
{ |
|
"epoch": 2.6531511679153814, |
|
"grad_norm": 0.2510845629901009, |
|
"learning_rate": 8.034558365164868e-06, |
|
"loss": 1.6476, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 2.655354781842221, |
|
"grad_norm": 0.3375793364015892, |
|
"learning_rate": 7.934159426271403e-06, |
|
"loss": 1.6063, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 2.6575583957690614, |
|
"grad_norm": 0.33573003764748477, |
|
"learning_rate": 7.834365806026578e-06, |
|
"loss": 1.5814, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 2.659762009695901, |
|
"grad_norm": 0.38295818181236296, |
|
"learning_rate": 7.735178160557943e-06, |
|
"loss": 1.7642, |
|
"step": 6035 |
|
}, |
|
{ |
|
"epoch": 2.6619656236227414, |
|
"grad_norm": 0.28003247319118707, |
|
"learning_rate": 7.636597142009017e-06, |
|
"loss": 1.7946, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 2.664169237549581, |
|
"grad_norm": 0.367119348898622, |
|
"learning_rate": 7.538623398534661e-06, |
|
"loss": 1.5553, |
|
"step": 6045 |
|
}, |
|
{ |
|
"epoch": 2.6663728514764213, |
|
"grad_norm": 0.30087617582351794, |
|
"learning_rate": 7.441257574297089e-06, |
|
"loss": 1.56, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.6685764654032615, |
|
"grad_norm": 0.2679029740855203, |
|
"learning_rate": 7.344500309461511e-06, |
|
"loss": 1.5622, |
|
"step": 6055 |
|
}, |
|
{ |
|
"epoch": 2.6707800793301013, |
|
"grad_norm": 0.38643212881302264, |
|
"learning_rate": 7.248352240192002e-06, |
|
"loss": 1.6344, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 2.6729836932569415, |
|
"grad_norm": 0.3036596598041859, |
|
"learning_rate": 7.15281399864719e-06, |
|
"loss": 1.6126, |
|
"step": 6065 |
|
}, |
|
{ |
|
"epoch": 2.6751873071837813, |
|
"grad_norm": 0.3409000984647797, |
|
"learning_rate": 7.057886212976239e-06, |
|
"loss": 1.7453, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 2.6773909211106215, |
|
"grad_norm": 0.3123434626912612, |
|
"learning_rate": 6.963569507314627e-06, |
|
"loss": 1.6624, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 2.6795945350374613, |
|
"grad_norm": 0.3272418793360404, |
|
"learning_rate": 6.8698645017801325e-06, |
|
"loss": 1.8614, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 2.6817981489643015, |
|
"grad_norm": 0.2432770598979605, |
|
"learning_rate": 6.776771812468618e-06, |
|
"loss": 1.6761, |
|
"step": 6085 |
|
}, |
|
{ |
|
"epoch": 2.6840017628911417, |
|
"grad_norm": 0.29638907037530365, |
|
"learning_rate": 6.684292051450147e-06, |
|
"loss": 1.5734, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 2.6862053768179814, |
|
"grad_norm": 0.2927874947198909, |
|
"learning_rate": 6.592425826764781e-06, |
|
"loss": 1.6527, |
|
"step": 6095 |
|
}, |
|
{ |
|
"epoch": 2.6884089907448216, |
|
"grad_norm": 0.3262506822619773, |
|
"learning_rate": 6.501173742418753e-06, |
|
"loss": 1.7488, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.6906126046716614, |
|
"grad_norm": 0.29388683856815984, |
|
"learning_rate": 6.410536398380385e-06, |
|
"loss": 1.7391, |
|
"step": 6105 |
|
}, |
|
{ |
|
"epoch": 2.6928162185985016, |
|
"grad_norm": 0.42564932659573224, |
|
"learning_rate": 6.320514390576193e-06, |
|
"loss": 1.5618, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 2.6950198325253414, |
|
"grad_norm": 0.33120895230745345, |
|
"learning_rate": 6.231108310886924e-06, |
|
"loss": 1.5172, |
|
"step": 6115 |
|
}, |
|
{ |
|
"epoch": 2.6972234464521816, |
|
"grad_norm": 0.2970159546645706, |
|
"learning_rate": 6.142318747143716e-06, |
|
"loss": 1.5319, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 2.699427060379022, |
|
"grad_norm": 0.3125793002150719, |
|
"learning_rate": 6.054146283124218e-06, |
|
"loss": 1.6401, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 2.7016306743058616, |
|
"grad_norm": 0.30146872233206856, |
|
"learning_rate": 5.966591498548724e-06, |
|
"loss": 1.7384, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 2.7038342882327018, |
|
"grad_norm": 0.3887993733574576, |
|
"learning_rate": 5.8796549690763645e-06, |
|
"loss": 1.8019, |
|
"step": 6135 |
|
}, |
|
{ |
|
"epoch": 2.7060379021595415, |
|
"grad_norm": 0.3033022818548619, |
|
"learning_rate": 5.79333726630138e-06, |
|
"loss": 1.5844, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 2.7082415160863818, |
|
"grad_norm": 0.368025241100897, |
|
"learning_rate": 5.7076389577493175e-06, |
|
"loss": 1.8454, |
|
"step": 6145 |
|
}, |
|
{ |
|
"epoch": 2.7104451300132215, |
|
"grad_norm": 0.298047160015892, |
|
"learning_rate": 5.622560606873262e-06, |
|
"loss": 1.6045, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.7126487439400617, |
|
"grad_norm": 0.30605420571316905, |
|
"learning_rate": 5.538102773050235e-06, |
|
"loss": 1.696, |
|
"step": 6155 |
|
}, |
|
{ |
|
"epoch": 2.714852357866902, |
|
"grad_norm": 0.3454395184873584, |
|
"learning_rate": 5.454266011577369e-06, |
|
"loss": 1.6258, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 2.7170559717937417, |
|
"grad_norm": 0.30863792610727064, |
|
"learning_rate": 5.371050873668437e-06, |
|
"loss": 1.5895, |
|
"step": 6165 |
|
}, |
|
{ |
|
"epoch": 2.719259585720582, |
|
"grad_norm": 0.32671612304128317, |
|
"learning_rate": 5.2884579064500615e-06, |
|
"loss": 1.751, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 2.7214631996474217, |
|
"grad_norm": 0.24404249683796003, |
|
"learning_rate": 5.206487652958214e-06, |
|
"loss": 1.5318, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 2.723666813574262, |
|
"grad_norm": 0.31345216172176077, |
|
"learning_rate": 5.125140652134652e-06, |
|
"loss": 1.6814, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 2.7258704275011016, |
|
"grad_norm": 0.3301202829233535, |
|
"learning_rate": 5.044417438823279e-06, |
|
"loss": 1.6688, |
|
"step": 6185 |
|
}, |
|
{ |
|
"epoch": 2.728074041427942, |
|
"grad_norm": 0.3180153311679256, |
|
"learning_rate": 4.964318543766733e-06, |
|
"loss": 1.8152, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 2.730277655354782, |
|
"grad_norm": 0.3477053424267945, |
|
"learning_rate": 4.884844493602847e-06, |
|
"loss": 1.6068, |
|
"step": 6195 |
|
}, |
|
{ |
|
"epoch": 2.732481269281622, |
|
"grad_norm": 0.3829450747175287, |
|
"learning_rate": 4.805995810861219e-06, |
|
"loss": 1.5436, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.734684883208462, |
|
"grad_norm": 0.35144134365971347, |
|
"learning_rate": 4.727773013959702e-06, |
|
"loss": 1.7733, |
|
"step": 6205 |
|
}, |
|
{ |
|
"epoch": 2.736888497135302, |
|
"grad_norm": 0.3511048030633929, |
|
"learning_rate": 4.650176617201074e-06, |
|
"loss": 1.7483, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 2.739092111062142, |
|
"grad_norm": 0.3146009823620477, |
|
"learning_rate": 4.573207130769663e-06, |
|
"loss": 1.6416, |
|
"step": 6215 |
|
}, |
|
{ |
|
"epoch": 2.7412957249889818, |
|
"grad_norm": 0.2961123189548949, |
|
"learning_rate": 4.496865060727917e-06, |
|
"loss": 1.5871, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 2.743499338915822, |
|
"grad_norm": 0.2827248539547275, |
|
"learning_rate": 4.421150909013094e-06, |
|
"loss": 1.6537, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 2.745702952842662, |
|
"grad_norm": 0.3230949462447901, |
|
"learning_rate": 4.346065173434055e-06, |
|
"loss": 1.5128, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 2.747906566769502, |
|
"grad_norm": 0.27203611650594783, |
|
"learning_rate": 4.271608347667888e-06, |
|
"loss": 1.6916, |
|
"step": 6235 |
|
}, |
|
{ |
|
"epoch": 2.750110180696342, |
|
"grad_norm": 0.316718312569841, |
|
"learning_rate": 4.197780921256678e-06, |
|
"loss": 1.7967, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 2.752313794623182, |
|
"grad_norm": 0.31564921665656825, |
|
"learning_rate": 4.1245833796043184e-06, |
|
"loss": 1.5092, |
|
"step": 6245 |
|
}, |
|
{ |
|
"epoch": 2.754517408550022, |
|
"grad_norm": 0.3023941557144956, |
|
"learning_rate": 4.052016203973319e-06, |
|
"loss": 1.6864, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.756721022476862, |
|
"grad_norm": 0.31013607515444674, |
|
"learning_rate": 3.9800798714816566e-06, |
|
"loss": 1.7096, |
|
"step": 6255 |
|
}, |
|
{ |
|
"epoch": 2.758924636403702, |
|
"grad_norm": 0.34766875505420175, |
|
"learning_rate": 3.908774855099529e-06, |
|
"loss": 1.6837, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 2.7611282503305423, |
|
"grad_norm": 0.2943152497920515, |
|
"learning_rate": 3.838101623646429e-06, |
|
"loss": 1.6478, |
|
"step": 6265 |
|
}, |
|
{ |
|
"epoch": 2.763331864257382, |
|
"grad_norm": 0.32690567367741863, |
|
"learning_rate": 3.768060641787874e-06, |
|
"loss": 1.8321, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 2.765535478184222, |
|
"grad_norm": 0.35174727110880194, |
|
"learning_rate": 3.698652370032496e-06, |
|
"loss": 1.7583, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 2.767739092111062, |
|
"grad_norm": 0.30775438684619605, |
|
"learning_rate": 3.6298772647289204e-06, |
|
"loss": 1.7887, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 2.7699427060379023, |
|
"grad_norm": 0.2974347187890948, |
|
"learning_rate": 3.561735778062847e-06, |
|
"loss": 1.5669, |
|
"step": 6285 |
|
}, |
|
{ |
|
"epoch": 2.772146319964742, |
|
"grad_norm": 0.32631762135645986, |
|
"learning_rate": 3.4942283580539747e-06, |
|
"loss": 1.5496, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 2.7743499338915822, |
|
"grad_norm": 0.3078445012000501, |
|
"learning_rate": 3.427355448553149e-06, |
|
"loss": 1.4473, |
|
"step": 6295 |
|
}, |
|
{ |
|
"epoch": 2.7765535478184225, |
|
"grad_norm": 0.31874633296757016, |
|
"learning_rate": 3.3611174892393848e-06, |
|
"loss": 1.7297, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.778757161745262, |
|
"grad_norm": 0.3877251343286691, |
|
"learning_rate": 3.2955149156170373e-06, |
|
"loss": 1.7889, |
|
"step": 6305 |
|
}, |
|
{ |
|
"epoch": 2.780960775672102, |
|
"grad_norm": 0.3050428005282347, |
|
"learning_rate": 3.230548159012836e-06, |
|
"loss": 1.7297, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 2.783164389598942, |
|
"grad_norm": 0.3297939270880444, |
|
"learning_rate": 3.1662176465731776e-06, |
|
"loss": 1.7542, |
|
"step": 6315 |
|
}, |
|
{ |
|
"epoch": 2.7853680035257824, |
|
"grad_norm": 0.35972869738440505, |
|
"learning_rate": 3.1025238012612146e-06, |
|
"loss": 1.6169, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 2.787571617452622, |
|
"grad_norm": 0.3384226865100565, |
|
"learning_rate": 3.039467041854105e-06, |
|
"loss": 1.6362, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 2.7897752313794624, |
|
"grad_norm": 0.3670760451609635, |
|
"learning_rate": 2.97704778294029e-06, |
|
"loss": 1.6398, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 2.7919788453063026, |
|
"grad_norm": 0.2854714227749342, |
|
"learning_rate": 2.9152664349167415e-06, |
|
"loss": 1.4325, |
|
"step": 6335 |
|
}, |
|
{ |
|
"epoch": 2.7941824592331423, |
|
"grad_norm": 0.3042468904319354, |
|
"learning_rate": 2.854123403986253e-06, |
|
"loss": 1.6423, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 2.796386073159982, |
|
"grad_norm": 0.33954395775276786, |
|
"learning_rate": 2.793619092154787e-06, |
|
"loss": 1.6785, |
|
"step": 6345 |
|
}, |
|
{ |
|
"epoch": 2.7985896870868223, |
|
"grad_norm": 0.41052304755954266, |
|
"learning_rate": 2.7337538972287967e-06, |
|
"loss": 1.7808, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.8007933010136625, |
|
"grad_norm": 0.2735021459128279, |
|
"learning_rate": 2.674528212812721e-06, |
|
"loss": 1.579, |
|
"step": 6355 |
|
}, |
|
{ |
|
"epoch": 2.8029969149405023, |
|
"grad_norm": 0.42116340427498267, |
|
"learning_rate": 2.6159424283062507e-06, |
|
"loss": 1.665, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 2.8052005288673425, |
|
"grad_norm": 0.40395544915333126, |
|
"learning_rate": 2.557996928901829e-06, |
|
"loss": 1.6685, |
|
"step": 6365 |
|
}, |
|
{ |
|
"epoch": 2.8074041427941827, |
|
"grad_norm": 0.41455342226161546, |
|
"learning_rate": 2.5006920955821465e-06, |
|
"loss": 1.7578, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 2.8096077567210225, |
|
"grad_norm": 0.32321328330549354, |
|
"learning_rate": 2.4440283051176405e-06, |
|
"loss": 1.7026, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 2.8118113706478622, |
|
"grad_norm": 0.2956835138059502, |
|
"learning_rate": 2.388005930063941e-06, |
|
"loss": 1.8632, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 2.8140149845747024, |
|
"grad_norm": 0.3268794222372435, |
|
"learning_rate": 2.3326253387594753e-06, |
|
"loss": 1.6233, |
|
"step": 6385 |
|
}, |
|
{ |
|
"epoch": 2.8162185985015427, |
|
"grad_norm": 0.3626797930342101, |
|
"learning_rate": 2.277886895323078e-06, |
|
"loss": 1.74, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 2.8184222124283824, |
|
"grad_norm": 0.33358450270104334, |
|
"learning_rate": 2.2237909596515396e-06, |
|
"loss": 1.4655, |
|
"step": 6395 |
|
}, |
|
{ |
|
"epoch": 2.8206258263552226, |
|
"grad_norm": 0.29838126441173135, |
|
"learning_rate": 2.1703378874172507e-06, |
|
"loss": 1.4969, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.822829440282063, |
|
"grad_norm": 0.34601838203569607, |
|
"learning_rate": 2.117528030065907e-06, |
|
"loss": 1.6886, |
|
"step": 6405 |
|
}, |
|
{ |
|
"epoch": 2.8250330542089026, |
|
"grad_norm": 0.3110562652655748, |
|
"learning_rate": 2.0653617348141084e-06, |
|
"loss": 1.4905, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 2.8272366681357424, |
|
"grad_norm": 0.3274079081069346, |
|
"learning_rate": 2.013839344647217e-06, |
|
"loss": 1.6808, |
|
"step": 6415 |
|
}, |
|
{ |
|
"epoch": 2.8294402820625826, |
|
"grad_norm": 0.34133335820544286, |
|
"learning_rate": 1.962961198316937e-06, |
|
"loss": 1.7414, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 2.831643895989423, |
|
"grad_norm": 0.3234135423197038, |
|
"learning_rate": 1.912727630339217e-06, |
|
"loss": 1.4927, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 2.8338475099162626, |
|
"grad_norm": 0.31099973650003665, |
|
"learning_rate": 1.8631389709919843e-06, |
|
"loss": 1.5605, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 2.8360511238431028, |
|
"grad_norm": 0.37303627703284004, |
|
"learning_rate": 1.8141955463129912e-06, |
|
"loss": 1.6712, |
|
"step": 6435 |
|
}, |
|
{ |
|
"epoch": 2.838254737769943, |
|
"grad_norm": 0.3614598619091711, |
|
"learning_rate": 1.7658976780976944e-06, |
|
"loss": 1.7914, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 2.8404583516967827, |
|
"grad_norm": 0.30969623455642703, |
|
"learning_rate": 1.7182456838971016e-06, |
|
"loss": 1.5793, |
|
"step": 6445 |
|
}, |
|
{ |
|
"epoch": 2.8426619656236225, |
|
"grad_norm": 0.26680083024728835, |
|
"learning_rate": 1.6712398770156734e-06, |
|
"loss": 1.5423, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.8448655795504627, |
|
"grad_norm": 0.38620184585663864, |
|
"learning_rate": 1.6248805665093348e-06, |
|
"loss": 1.7361, |
|
"step": 6455 |
|
}, |
|
{ |
|
"epoch": 2.847069193477303, |
|
"grad_norm": 0.29245866381267194, |
|
"learning_rate": 1.5791680571833667e-06, |
|
"loss": 1.4591, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 2.8492728074041427, |
|
"grad_norm": 0.36925991583350115, |
|
"learning_rate": 1.5341026495904409e-06, |
|
"loss": 1.5466, |
|
"step": 6465 |
|
}, |
|
{ |
|
"epoch": 2.851476421330983, |
|
"grad_norm": 0.31621129108601936, |
|
"learning_rate": 1.4896846400286323e-06, |
|
"loss": 1.5198, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 2.853680035257823, |
|
"grad_norm": 0.3918256317744239, |
|
"learning_rate": 1.4459143205394876e-06, |
|
"loss": 1.8413, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 2.855883649184663, |
|
"grad_norm": 0.3333872096143664, |
|
"learning_rate": 1.4027919789060818e-06, |
|
"loss": 1.6091, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 2.8580872631115026, |
|
"grad_norm": 0.350383265332482, |
|
"learning_rate": 1.36031789865112e-06, |
|
"loss": 1.77, |
|
"step": 6485 |
|
}, |
|
{ |
|
"epoch": 2.860290877038343, |
|
"grad_norm": 0.32052781126282354, |
|
"learning_rate": 1.3184923590351062e-06, |
|
"loss": 1.6178, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 2.862494490965183, |
|
"grad_norm": 0.33800257995746813, |
|
"learning_rate": 1.27731563505451e-06, |
|
"loss": 1.6759, |
|
"step": 6495 |
|
}, |
|
{ |
|
"epoch": 2.864698104892023, |
|
"grad_norm": 0.32607540150243636, |
|
"learning_rate": 1.236787997439892e-06, |
|
"loss": 1.5576, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.866901718818863, |
|
"grad_norm": 0.32271754224482374, |
|
"learning_rate": 1.196909712654204e-06, |
|
"loss": 1.5769, |
|
"step": 6505 |
|
}, |
|
{ |
|
"epoch": 2.8691053327457032, |
|
"grad_norm": 0.32279106125213675, |
|
"learning_rate": 1.1576810428910012e-06, |
|
"loss": 1.4904, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 2.871308946672543, |
|
"grad_norm": 0.33356280248337183, |
|
"learning_rate": 1.1191022460727007e-06, |
|
"loss": 1.5742, |
|
"step": 6515 |
|
}, |
|
{ |
|
"epoch": 2.8735125605993828, |
|
"grad_norm": 0.3151589561132343, |
|
"learning_rate": 1.0811735758489372e-06, |
|
"loss": 1.6439, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 2.875716174526223, |
|
"grad_norm": 0.3317166675779296, |
|
"learning_rate": 1.04389528159482e-06, |
|
"loss": 1.4983, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 2.877919788453063, |
|
"grad_norm": 0.3198594908928924, |
|
"learning_rate": 1.0072676084093902e-06, |
|
"loss": 1.6749, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 2.880123402379903, |
|
"grad_norm": 0.2937946173545102, |
|
"learning_rate": 9.712907971139218e-07, |
|
"loss": 1.7593, |
|
"step": 6535 |
|
}, |
|
{ |
|
"epoch": 2.882327016306743, |
|
"grad_norm": 0.3493222702605276, |
|
"learning_rate": 9.359650842503565e-07, |
|
"loss": 1.737, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 2.884530630233583, |
|
"grad_norm": 0.3038129813169421, |
|
"learning_rate": 9.012907020798156e-07, |
|
"loss": 1.6078, |
|
"step": 6545 |
|
}, |
|
{ |
|
"epoch": 2.886734244160423, |
|
"grad_norm": 0.3031344362125413, |
|
"learning_rate": 8.672678785809796e-07, |
|
"loss": 1.6788, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.888937858087263, |
|
"grad_norm": 0.31295231167033, |
|
"learning_rate": 8.338968374486555e-07, |
|
"loss": 1.734, |
|
"step": 6555 |
|
}, |
|
{ |
|
"epoch": 2.891141472014103, |
|
"grad_norm": 0.30464503391760195, |
|
"learning_rate": 8.011777980922564e-07, |
|
"loss": 1.6216, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 2.8933450859409433, |
|
"grad_norm": 0.31057203787361704, |
|
"learning_rate": 7.691109756344128e-07, |
|
"loss": 1.6683, |
|
"step": 6565 |
|
}, |
|
{ |
|
"epoch": 2.895548699867783, |
|
"grad_norm": 0.3406431085406218, |
|
"learning_rate": 7.376965809095193e-07, |
|
"loss": 1.7457, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 2.8977523137946233, |
|
"grad_norm": 0.26328954802317356, |
|
"learning_rate": 7.06934820462346e-07, |
|
"loss": 1.6027, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 2.899955927721463, |
|
"grad_norm": 0.3427927943612674, |
|
"learning_rate": 6.768258965467289e-07, |
|
"loss": 1.7368, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 2.9021595416483033, |
|
"grad_norm": 0.3099689467854988, |
|
"learning_rate": 6.473700071241484e-07, |
|
"loss": 1.7899, |
|
"step": 6585 |
|
}, |
|
{ |
|
"epoch": 2.904363155575143, |
|
"grad_norm": 0.38502367800150844, |
|
"learning_rate": 6.185673458625418e-07, |
|
"loss": 1.732, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 2.9065667695019832, |
|
"grad_norm": 0.3501339480817073, |
|
"learning_rate": 5.904181021349375e-07, |
|
"loss": 1.6615, |
|
"step": 6595 |
|
}, |
|
{ |
|
"epoch": 2.9087703834288234, |
|
"grad_norm": 0.28836814250088966, |
|
"learning_rate": 5.629224610182671e-07, |
|
"loss": 1.5576, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.910973997355663, |
|
"grad_norm": 0.2999170211226527, |
|
"learning_rate": 5.360806032920995e-07, |
|
"loss": 1.6333, |
|
"step": 6605 |
|
}, |
|
{ |
|
"epoch": 2.9131776112825034, |
|
"grad_norm": 0.3369079119860257, |
|
"learning_rate": 5.09892705437498e-07, |
|
"loss": 1.4815, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 2.915381225209343, |
|
"grad_norm": 0.30167843329688654, |
|
"learning_rate": 4.843589396358427e-07, |
|
"loss": 1.7719, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 2.9175848391361834, |
|
"grad_norm": 0.3352146963211379, |
|
"learning_rate": 4.5947947376767663e-07, |
|
"loss": 1.8039, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 2.919788453063023, |
|
"grad_norm": 0.31153697039563194, |
|
"learning_rate": 4.3525447141165023e-07, |
|
"loss": 1.4281, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 2.9219920669898634, |
|
"grad_norm": 0.3021129933679619, |
|
"learning_rate": 4.116840918434006e-07, |
|
"loss": 1.7845, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 2.9241956809167036, |
|
"grad_norm": 0.3038448101025625, |
|
"learning_rate": 3.887684900345301e-07, |
|
"loss": 1.6785, |
|
"step": 6635 |
|
}, |
|
{ |
|
"epoch": 2.9263992948435433, |
|
"grad_norm": 0.29571689111577726, |
|
"learning_rate": 3.665078166515623e-07, |
|
"loss": 1.5903, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 2.9286029087703835, |
|
"grad_norm": 0.3420405614774203, |
|
"learning_rate": 3.449022180549766e-07, |
|
"loss": 1.7721, |
|
"step": 6645 |
|
}, |
|
{ |
|
"epoch": 2.9308065226972233, |
|
"grad_norm": 0.28585767479477975, |
|
"learning_rate": 3.2395183629824186e-07, |
|
"loss": 1.6843, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.9330101366240635, |
|
"grad_norm": 0.2974354225013896, |
|
"learning_rate": 3.0365680912688434e-07, |
|
"loss": 1.557, |
|
"step": 6655 |
|
}, |
|
{ |
|
"epoch": 2.9352137505509033, |
|
"grad_norm": 0.3294613096996469, |
|
"learning_rate": 2.840172699775656e-07, |
|
"loss": 1.5854, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 2.9374173644777435, |
|
"grad_norm": 0.2754088803888805, |
|
"learning_rate": 2.650333479771949e-07, |
|
"loss": 1.5721, |
|
"step": 6665 |
|
}, |
|
{ |
|
"epoch": 2.9396209784045837, |
|
"grad_norm": 0.2983838481391387, |
|
"learning_rate": 2.467051679421406e-07, |
|
"loss": 1.6993, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 2.9418245923314235, |
|
"grad_norm": 0.31155141036980233, |
|
"learning_rate": 2.290328503773309e-07, |
|
"loss": 1.687, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 2.9440282062582637, |
|
"grad_norm": 0.29451801544591716, |
|
"learning_rate": 2.1201651147554347e-07, |
|
"loss": 1.7157, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 2.9462318201851034, |
|
"grad_norm": 0.3316660290657273, |
|
"learning_rate": 1.956562631165504e-07, |
|
"loss": 1.6958, |
|
"step": 6685 |
|
}, |
|
{ |
|
"epoch": 2.9484354341119436, |
|
"grad_norm": 0.37499704382786475, |
|
"learning_rate": 1.7995221286645215e-07, |
|
"loss": 1.7061, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 2.9506390480387834, |
|
"grad_norm": 0.21043643461218126, |
|
"learning_rate": 1.6490446397696702e-07, |
|
"loss": 1.3901, |
|
"step": 6695 |
|
}, |
|
{ |
|
"epoch": 2.9528426619656236, |
|
"grad_norm": 0.3260933032444566, |
|
"learning_rate": 1.5051311538469837e-07, |
|
"loss": 1.6567, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.955046275892464, |
|
"grad_norm": 0.3164050584909926, |
|
"learning_rate": 1.367782617105351e-07, |
|
"loss": 1.6661, |
|
"step": 6705 |
|
}, |
|
{ |
|
"epoch": 2.9572498898193036, |
|
"grad_norm": 0.3495591454896855, |
|
"learning_rate": 1.2369999325901881e-07, |
|
"loss": 1.6197, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 2.959453503746144, |
|
"grad_norm": 0.3351019168716464, |
|
"learning_rate": 1.1127839601774437e-07, |
|
"loss": 1.6162, |
|
"step": 6715 |
|
}, |
|
{ |
|
"epoch": 2.9616571176729836, |
|
"grad_norm": 0.34641497402663024, |
|
"learning_rate": 9.951355165678244e-08, |
|
"loss": 1.7908, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 2.9638607315998238, |
|
"grad_norm": 0.37716614486780947, |
|
"learning_rate": 8.840553752815783e-08, |
|
"loss": 1.6302, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 2.9660643455266635, |
|
"grad_norm": 0.3039829114023131, |
|
"learning_rate": 7.79544266653609e-08, |
|
"loss": 1.7006, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 2.9682679594535037, |
|
"grad_norm": 0.31609676417425303, |
|
"learning_rate": 6.816028778281469e-08, |
|
"loss": 1.6702, |
|
"step": 6735 |
|
}, |
|
{ |
|
"epoch": 2.970471573380344, |
|
"grad_norm": 0.37777667052819425, |
|
"learning_rate": 5.902318527547523e-08, |
|
"loss": 1.4444, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 2.9726751873071837, |
|
"grad_norm": 0.36388069806115425, |
|
"learning_rate": 5.0543179218365265e-08, |
|
"loss": 1.5438, |
|
"step": 6745 |
|
}, |
|
{ |
|
"epoch": 2.974878801234024, |
|
"grad_norm": 0.362171321908763, |
|
"learning_rate": 4.272032536621895e-08, |
|
"loss": 1.7638, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.9770824151608637, |
|
"grad_norm": 0.3560152063618098, |
|
"learning_rate": 3.5554675153082195e-08, |
|
"loss": 1.6422, |
|
"step": 6755 |
|
}, |
|
{ |
|
"epoch": 2.979286029087704, |
|
"grad_norm": 0.2934857454303967, |
|
"learning_rate": 2.9046275692012904e-08, |
|
"loss": 1.5529, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 2.9814896430145437, |
|
"grad_norm": 0.31059161987377404, |
|
"learning_rate": 2.3195169774714586e-08, |
|
"loss": 1.5975, |
|
"step": 6765 |
|
}, |
|
{ |
|
"epoch": 2.983693256941384, |
|
"grad_norm": 0.4364323493515609, |
|
"learning_rate": 1.8001395871303228e-08, |
|
"loss": 1.7395, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 2.985896870868224, |
|
"grad_norm": 0.3199314191111005, |
|
"learning_rate": 1.3464988130051925e-08, |
|
"loss": 1.5351, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 2.988100484795064, |
|
"grad_norm": 0.2946914049828253, |
|
"learning_rate": 9.585976377124439e-09, |
|
"loss": 1.519, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 2.990304098721904, |
|
"grad_norm": 0.2758574818813751, |
|
"learning_rate": 6.364386116419762e-09, |
|
"loss": 1.4758, |
|
"step": 6785 |
|
}, |
|
{ |
|
"epoch": 2.992507712648744, |
|
"grad_norm": 0.329124521705272, |
|
"learning_rate": 3.800238529416688e-09, |
|
"loss": 1.8557, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 2.994711326575584, |
|
"grad_norm": 0.3394680059091016, |
|
"learning_rate": 1.8935504749628684e-09, |
|
"loss": 1.7226, |
|
"step": 6795 |
|
}, |
|
{ |
|
"epoch": 2.996914940502424, |
|
"grad_norm": 0.27640804395611795, |
|
"learning_rate": 6.443344892637093e-10, |
|
"loss": 1.6186, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.999118554429264, |
|
"grad_norm": 0.3138822790633703, |
|
"learning_rate": 5.259878569363608e-11, |
|
"loss": 1.6958, |
|
"step": 6805 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 6807, |
|
"total_flos": 1.620091928969216e+16, |
|
"train_loss": 1.72514724640379, |
|
"train_runtime": 38823.0414, |
|
"train_samples_per_second": 0.701, |
|
"train_steps_per_second": 0.175 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 6807, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.620091928969216e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|