|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 95491, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001047219109654313, |
|
"grad_norm": 6.445349216461182, |
|
"learning_rate": 5.238344683080147e-08, |
|
"loss": 1.2293, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.002094438219308626, |
|
"grad_norm": 7.579877853393555, |
|
"learning_rate": 1.0476689366160294e-07, |
|
"loss": 1.2053, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.003141657328962939, |
|
"grad_norm": 5.277140140533447, |
|
"learning_rate": 1.5715034049240438e-07, |
|
"loss": 1.1086, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.004188876438617252, |
|
"grad_norm": 3.0632076263427734, |
|
"learning_rate": 2.0953378732320588e-07, |
|
"loss": 1.0615, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.005236095548271565, |
|
"grad_norm": 8.091245651245117, |
|
"learning_rate": 2.6191723415400735e-07, |
|
"loss": 0.9659, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.006283314657925878, |
|
"grad_norm": 2.5814743041992188, |
|
"learning_rate": 3.1430068098480877e-07, |
|
"loss": 0.9656, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.007330533767580191, |
|
"grad_norm": 8.025248527526855, |
|
"learning_rate": 3.6668412781561024e-07, |
|
"loss": 0.9068, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.008377752877234504, |
|
"grad_norm": 2.8023085594177246, |
|
"learning_rate": 4.1906757464641176e-07, |
|
"loss": 0.8278, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.009424971986888816, |
|
"grad_norm": 2.9815306663513184, |
|
"learning_rate": 4.714510214772132e-07, |
|
"loss": 0.8097, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.01047219109654313, |
|
"grad_norm": 4.450624465942383, |
|
"learning_rate": 5.238344683080147e-07, |
|
"loss": 0.8611, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.011519410206197442, |
|
"grad_norm": 2.9705615043640137, |
|
"learning_rate": 5.762179151388162e-07, |
|
"loss": 0.8217, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.012566629315851756, |
|
"grad_norm": 5.060612678527832, |
|
"learning_rate": 6.286013619696175e-07, |
|
"loss": 0.8326, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.01361384842550607, |
|
"grad_norm": 4.002683639526367, |
|
"learning_rate": 6.809848088004191e-07, |
|
"loss": 0.7742, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.014661067535160381, |
|
"grad_norm": 3.3899588584899902, |
|
"learning_rate": 7.333682556312205e-07, |
|
"loss": 0.7594, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.015708286644814693, |
|
"grad_norm": 4.091441631317139, |
|
"learning_rate": 7.857517024620219e-07, |
|
"loss": 0.7871, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01675550575446901, |
|
"grad_norm": 3.302689790725708, |
|
"learning_rate": 8.381351492928235e-07, |
|
"loss": 0.721, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.01780272486412332, |
|
"grad_norm": 3.8457956314086914, |
|
"learning_rate": 8.905185961236249e-07, |
|
"loss": 0.6789, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.018849943973777632, |
|
"grad_norm": 3.763422727584839, |
|
"learning_rate": 9.429020429544264e-07, |
|
"loss": 0.7035, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.019897163083431948, |
|
"grad_norm": 2.3855648040771484, |
|
"learning_rate": 9.95285489785228e-07, |
|
"loss": 0.7331, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.02094438219308626, |
|
"grad_norm": 3.0932857990264893, |
|
"learning_rate": 9.999976668774249e-07, |
|
"loss": 0.7123, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.02199160130274057, |
|
"grad_norm": 2.939152956008911, |
|
"learning_rate": 9.999897217221058e-07, |
|
"loss": 0.6106, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.023038820412394884, |
|
"grad_norm": 2.148160934448242, |
|
"learning_rate": 9.999761418022958e-07, |
|
"loss": 0.6828, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.0240860395220492, |
|
"grad_norm": 2.302873134613037, |
|
"learning_rate": 9.999569272710377e-07, |
|
"loss": 0.6691, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.02513325863170351, |
|
"grad_norm": 4.346377372741699, |
|
"learning_rate": 9.999320783448744e-07, |
|
"loss": 0.6698, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.026180477741357823, |
|
"grad_norm": 2.157055616378784, |
|
"learning_rate": 9.999015953038474e-07, |
|
"loss": 0.6019, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.02722769685101214, |
|
"grad_norm": 2.7303714752197266, |
|
"learning_rate": 9.998654784914935e-07, |
|
"loss": 0.5972, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.02827491596066645, |
|
"grad_norm": 4.359681606292725, |
|
"learning_rate": 9.9982372831484e-07, |
|
"loss": 0.6381, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.029322135070320762, |
|
"grad_norm": 3.2993288040161133, |
|
"learning_rate": 9.997763452444018e-07, |
|
"loss": 0.6093, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.030369354179975078, |
|
"grad_norm": 3.061521053314209, |
|
"learning_rate": 9.99723329814175e-07, |
|
"loss": 0.6875, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.031416573289629386, |
|
"grad_norm": 2.3765642642974854, |
|
"learning_rate": 9.996646826216302e-07, |
|
"loss": 0.6031, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.0324637923992837, |
|
"grad_norm": 2.144615411758423, |
|
"learning_rate": 9.996004043277078e-07, |
|
"loss": 0.637, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.03351101150893802, |
|
"grad_norm": 3.2836430072784424, |
|
"learning_rate": 9.995304956568083e-07, |
|
"loss": 0.6425, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.034558230618592325, |
|
"grad_norm": 2.8710663318634033, |
|
"learning_rate": 9.99454957396786e-07, |
|
"loss": 0.6199, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.03560544972824664, |
|
"grad_norm": 2.5998404026031494, |
|
"learning_rate": 9.993737903989387e-07, |
|
"loss": 0.5903, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.036652668837900956, |
|
"grad_norm": 2.677945613861084, |
|
"learning_rate": 9.992869955779995e-07, |
|
"loss": 0.6473, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.037699887947555265, |
|
"grad_norm": 3.9936769008636475, |
|
"learning_rate": 9.991945739121251e-07, |
|
"loss": 0.5847, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.03874710705720958, |
|
"grad_norm": 2.839268207550049, |
|
"learning_rate": 9.990965264428851e-07, |
|
"loss": 0.5893, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.039794326166863896, |
|
"grad_norm": 2.4763646125793457, |
|
"learning_rate": 9.989928542752516e-07, |
|
"loss": 0.5865, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.040841545276518204, |
|
"grad_norm": 4.822995662689209, |
|
"learning_rate": 9.98883558577585e-07, |
|
"loss": 0.579, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.04188876438617252, |
|
"grad_norm": 2.6188089847564697, |
|
"learning_rate": 9.987686405816216e-07, |
|
"loss": 0.6065, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.042935983495826835, |
|
"grad_norm": 2.550874710083008, |
|
"learning_rate": 9.986481015824592e-07, |
|
"loss": 0.5911, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.04398320260548114, |
|
"grad_norm": 2.973268985748291, |
|
"learning_rate": 9.985219429385443e-07, |
|
"loss": 0.6216, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.04503042171513546, |
|
"grad_norm": 6.536316394805908, |
|
"learning_rate": 9.98390166071654e-07, |
|
"loss": 0.5904, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.04607764082478977, |
|
"grad_norm": 2.6079025268554688, |
|
"learning_rate": 9.982527724668825e-07, |
|
"loss": 0.5942, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.04712485993444408, |
|
"grad_norm": 2.2787749767303467, |
|
"learning_rate": 9.981097636726227e-07, |
|
"loss": 0.6174, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.0481720790440984, |
|
"grad_norm": 1.995902419090271, |
|
"learning_rate": 9.979611413005493e-07, |
|
"loss": 0.5698, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.04921929815375271, |
|
"grad_norm": 3.4670004844665527, |
|
"learning_rate": 9.97806907025601e-07, |
|
"loss": 0.5871, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.05026651726340702, |
|
"grad_norm": 2.329735279083252, |
|
"learning_rate": 9.97647062585961e-07, |
|
"loss": 0.6061, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.05131373637306134, |
|
"grad_norm": 2.4299092292785645, |
|
"learning_rate": 9.97481609783038e-07, |
|
"loss": 0.5944, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.052360955482715646, |
|
"grad_norm": 4.186954498291016, |
|
"learning_rate": 9.973105504814458e-07, |
|
"loss": 0.6131, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.05340817459236996, |
|
"grad_norm": 2.038557767868042, |
|
"learning_rate": 9.971338866089812e-07, |
|
"loss": 0.5668, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.05445539370202428, |
|
"grad_norm": 2.6505930423736572, |
|
"learning_rate": 9.96951620156604e-07, |
|
"loss": 0.5697, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.055502612811678585, |
|
"grad_norm": 3.494474411010742, |
|
"learning_rate": 9.967637531784138e-07, |
|
"loss": 0.6061, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.0565498319213329, |
|
"grad_norm": 1.573089599609375, |
|
"learning_rate": 9.965702877916262e-07, |
|
"loss": 0.5714, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.057597051030987216, |
|
"grad_norm": 3.103743553161621, |
|
"learning_rate": 9.963712261765495e-07, |
|
"loss": 0.6045, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.058644270140641525, |
|
"grad_norm": 2.182767152786255, |
|
"learning_rate": 9.96166570576561e-07, |
|
"loss": 0.6209, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.05969148925029584, |
|
"grad_norm": 2.818512439727783, |
|
"learning_rate": 9.959563232980801e-07, |
|
"loss": 0.5825, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.060738708359950155, |
|
"grad_norm": 6.24643611907959, |
|
"learning_rate": 9.957404867105435e-07, |
|
"loss": 0.5645, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.061785927469604464, |
|
"grad_norm": 2.866800308227539, |
|
"learning_rate": 9.955190632463774e-07, |
|
"loss": 0.5826, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.06283314657925877, |
|
"grad_norm": 1.9323431253433228, |
|
"learning_rate": 9.952920554009715e-07, |
|
"loss": 0.5706, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.06388036568891309, |
|
"grad_norm": 2.389801263809204, |
|
"learning_rate": 9.9505946573265e-07, |
|
"loss": 0.5888, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.0649275847985674, |
|
"grad_norm": 2.6937005519866943, |
|
"learning_rate": 9.948212968626429e-07, |
|
"loss": 0.5848, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.06597480390822172, |
|
"grad_norm": 3.2649362087249756, |
|
"learning_rate": 9.945775514750558e-07, |
|
"loss": 0.5746, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.06702202301787603, |
|
"grad_norm": 3.9703376293182373, |
|
"learning_rate": 9.943282323168416e-07, |
|
"loss": 0.5219, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.06806924212753035, |
|
"grad_norm": 3.0078823566436768, |
|
"learning_rate": 9.94073342197767e-07, |
|
"loss": 0.5867, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.06911646123718465, |
|
"grad_norm": 2.0793182849884033, |
|
"learning_rate": 9.938128839903829e-07, |
|
"loss": 0.5757, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.07016368034683897, |
|
"grad_norm": 1.7143627405166626, |
|
"learning_rate": 9.935468606299908e-07, |
|
"loss": 0.5753, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.07121089945649328, |
|
"grad_norm": 1.6375339031219482, |
|
"learning_rate": 9.932752751146102e-07, |
|
"loss": 0.5875, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.0722581185661476, |
|
"grad_norm": 3.0804569721221924, |
|
"learning_rate": 9.929981305049452e-07, |
|
"loss": 0.5399, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.07330533767580191, |
|
"grad_norm": 1.8709744215011597, |
|
"learning_rate": 9.92715429924349e-07, |
|
"loss": 0.5555, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.07435255678545621, |
|
"grad_norm": 2.213629722595215, |
|
"learning_rate": 9.924271765587897e-07, |
|
"loss": 0.5536, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.07539977589511053, |
|
"grad_norm": 1.5812900066375732, |
|
"learning_rate": 9.921333736568133e-07, |
|
"loss": 0.5973, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.07644699500476484, |
|
"grad_norm": 1.2580069303512573, |
|
"learning_rate": 9.918340245295086e-07, |
|
"loss": 0.549, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.07749421411441916, |
|
"grad_norm": 3.4917242527008057, |
|
"learning_rate": 9.915291325504685e-07, |
|
"loss": 0.5493, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.07854143322407348, |
|
"grad_norm": 3.6106157302856445, |
|
"learning_rate": 9.912187011557523e-07, |
|
"loss": 0.5367, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.07958865233372779, |
|
"grad_norm": 2.585413694381714, |
|
"learning_rate": 9.90902733843848e-07, |
|
"loss": 0.5242, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.08063587144338209, |
|
"grad_norm": 2.1417288780212402, |
|
"learning_rate": 9.905812341756314e-07, |
|
"loss": 0.5657, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.08168309055303641, |
|
"grad_norm": 2.6701626777648926, |
|
"learning_rate": 9.902542057743267e-07, |
|
"loss": 0.533, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.08273030966269072, |
|
"grad_norm": 2.7961204051971436, |
|
"learning_rate": 9.899216523254657e-07, |
|
"loss": 0.5833, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.08377752877234504, |
|
"grad_norm": 3.9673585891723633, |
|
"learning_rate": 9.895835775768464e-07, |
|
"loss": 0.5548, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.08482474788199935, |
|
"grad_norm": 2.384716272354126, |
|
"learning_rate": 9.892399853384903e-07, |
|
"loss": 0.5802, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.08587196699165367, |
|
"grad_norm": 2.7740979194641113, |
|
"learning_rate": 9.888908794825994e-07, |
|
"loss": 0.5565, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.08691918610130797, |
|
"grad_norm": 2.4571990966796875, |
|
"learning_rate": 9.885362639435133e-07, |
|
"loss": 0.5538, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.08796640521096229, |
|
"grad_norm": 2.063465118408203, |
|
"learning_rate": 9.88176142717664e-07, |
|
"loss": 0.603, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.0890136243206166, |
|
"grad_norm": 1.9801498651504517, |
|
"learning_rate": 9.878105198635321e-07, |
|
"loss": 0.5479, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.09006084343027092, |
|
"grad_norm": 2.044619083404541, |
|
"learning_rate": 9.87439399501599e-07, |
|
"loss": 0.5446, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.09110806253992523, |
|
"grad_norm": 2.573242664337158, |
|
"learning_rate": 9.87062785814303e-07, |
|
"loss": 0.5347, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.09215528164957953, |
|
"grad_norm": 2.520949125289917, |
|
"learning_rate": 9.866806830459898e-07, |
|
"loss": 0.5467, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.09320250075923385, |
|
"grad_norm": 2.924830913543701, |
|
"learning_rate": 9.86293095502866e-07, |
|
"loss": 0.5187, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.09424971986888817, |
|
"grad_norm": 2.2049362659454346, |
|
"learning_rate": 9.859000275529507e-07, |
|
"loss": 0.5549, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.09529693897854248, |
|
"grad_norm": 2.932223320007324, |
|
"learning_rate": 9.855014836260256e-07, |
|
"loss": 0.5723, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.0963441580881968, |
|
"grad_norm": 2.659306526184082, |
|
"learning_rate": 9.850974682135855e-07, |
|
"loss": 0.5471, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.09739137719785111, |
|
"grad_norm": 3.1078333854675293, |
|
"learning_rate": 9.84687985868787e-07, |
|
"loss": 0.5498, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.09843859630750541, |
|
"grad_norm": 2.73991322517395, |
|
"learning_rate": 9.842730412063984e-07, |
|
"loss": 0.5509, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.09948581541715973, |
|
"grad_norm": 2.288360595703125, |
|
"learning_rate": 9.83852638902747e-07, |
|
"loss": 0.5311, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.10053303452681404, |
|
"grad_norm": 2.391042947769165, |
|
"learning_rate": 9.834267836956652e-07, |
|
"loss": 0.569, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.10158025363646836, |
|
"grad_norm": 2.225496292114258, |
|
"learning_rate": 9.829954803844404e-07, |
|
"loss": 0.5432, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.10262747274612267, |
|
"grad_norm": 1.877164363861084, |
|
"learning_rate": 9.82558733829757e-07, |
|
"loss": 0.5795, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.10367469185577699, |
|
"grad_norm": 2.455549478530884, |
|
"learning_rate": 9.82116548953644e-07, |
|
"loss": 0.577, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.10472191096543129, |
|
"grad_norm": 3.1859889030456543, |
|
"learning_rate": 9.816689307394198e-07, |
|
"loss": 0.5742, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.10576913007508561, |
|
"grad_norm": 2.9405317306518555, |
|
"learning_rate": 9.812158842316341e-07, |
|
"loss": 0.5674, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.10681634918473992, |
|
"grad_norm": 2.1740851402282715, |
|
"learning_rate": 9.807574145360125e-07, |
|
"loss": 0.5219, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.10786356829439424, |
|
"grad_norm": 2.1551525592803955, |
|
"learning_rate": 9.80293526819399e-07, |
|
"loss": 0.5378, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.10891078740404855, |
|
"grad_norm": 1.479442834854126, |
|
"learning_rate": 9.798242263096968e-07, |
|
"loss": 0.5137, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.10995800651370287, |
|
"grad_norm": 2.2272469997406006, |
|
"learning_rate": 9.793495182958107e-07, |
|
"loss": 0.5469, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.11100522562335717, |
|
"grad_norm": 1.9610800743103027, |
|
"learning_rate": 9.78869408127586e-07, |
|
"loss": 0.5685, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.11205244473301149, |
|
"grad_norm": 2.2086081504821777, |
|
"learning_rate": 9.7838390121575e-07, |
|
"loss": 0.5505, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.1130996638426658, |
|
"grad_norm": 3.1201093196868896, |
|
"learning_rate": 9.778930030318488e-07, |
|
"loss": 0.5829, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.11414688295232012, |
|
"grad_norm": 2.6629204750061035, |
|
"learning_rate": 9.773967191081875e-07, |
|
"loss": 0.5925, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.11519410206197443, |
|
"grad_norm": 2.593073844909668, |
|
"learning_rate": 9.768950550377674e-07, |
|
"loss": 0.572, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.11624132117162873, |
|
"grad_norm": 4.5134687423706055, |
|
"learning_rate": 9.763880164742224e-07, |
|
"loss": 0.5106, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.11728854028128305, |
|
"grad_norm": 3.3710708618164062, |
|
"learning_rate": 9.758756091317557e-07, |
|
"loss": 0.567, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.11833575939093736, |
|
"grad_norm": 3.414686679840088, |
|
"learning_rate": 9.753578387850754e-07, |
|
"loss": 0.578, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.11938297850059168, |
|
"grad_norm": 2.6787045001983643, |
|
"learning_rate": 9.748347112693294e-07, |
|
"loss": 0.5587, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.120430197610246, |
|
"grad_norm": 2.505725860595703, |
|
"learning_rate": 9.743062324800395e-07, |
|
"loss": 0.5513, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.12147741671990031, |
|
"grad_norm": 2.5358970165252686, |
|
"learning_rate": 9.737724083730354e-07, |
|
"loss": 0.5378, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.12252463582955461, |
|
"grad_norm": 1.6748542785644531, |
|
"learning_rate": 9.732332449643868e-07, |
|
"loss": 0.5062, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.12357185493920893, |
|
"grad_norm": 2.4574966430664062, |
|
"learning_rate": 9.726887483303364e-07, |
|
"loss": 0.5721, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.12461907404886324, |
|
"grad_norm": 2.737337589263916, |
|
"learning_rate": 9.721389246072307e-07, |
|
"loss": 0.5963, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.12566629315851754, |
|
"grad_norm": 2.453996181488037, |
|
"learning_rate": 9.715837799914517e-07, |
|
"loss": 0.5917, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.12671351226817187, |
|
"grad_norm": 2.9003748893737793, |
|
"learning_rate": 9.710233207393463e-07, |
|
"loss": 0.5603, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.12776073137782618, |
|
"grad_norm": 2.409175395965576, |
|
"learning_rate": 9.704575531671562e-07, |
|
"loss": 0.568, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.1288079504874805, |
|
"grad_norm": 3.183899402618408, |
|
"learning_rate": 9.698864836509463e-07, |
|
"loss": 0.5702, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.1298551695971348, |
|
"grad_norm": 2.7574760913848877, |
|
"learning_rate": 9.693101186265336e-07, |
|
"loss": 0.5394, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.1309023887067891, |
|
"grad_norm": 2.9319100379943848, |
|
"learning_rate": 9.687284645894139e-07, |
|
"loss": 0.5504, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.13194960781644344, |
|
"grad_norm": 2.8977279663085938, |
|
"learning_rate": 9.681415280946887e-07, |
|
"loss": 0.611, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.13299682692609774, |
|
"grad_norm": 1.9469819068908691, |
|
"learning_rate": 9.675493157569922e-07, |
|
"loss": 0.5621, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.13404404603575207, |
|
"grad_norm": 2.0829553604125977, |
|
"learning_rate": 9.669518342504155e-07, |
|
"loss": 0.5305, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.13509126514540637, |
|
"grad_norm": 3.0171096324920654, |
|
"learning_rate": 9.663490903084324e-07, |
|
"loss": 0.5666, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.1361384842550607, |
|
"grad_norm": 3.0453896522521973, |
|
"learning_rate": 9.657410907238224e-07, |
|
"loss": 0.5332, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.137185703364715, |
|
"grad_norm": 2.2059998512268066, |
|
"learning_rate": 9.651278423485958e-07, |
|
"loss": 0.5859, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.1382329224743693, |
|
"grad_norm": 2.076673746109009, |
|
"learning_rate": 9.645093520939146e-07, |
|
"loss": 0.5048, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.13928014158402363, |
|
"grad_norm": 1.7987829446792603, |
|
"learning_rate": 9.638856269300163e-07, |
|
"loss": 0.5501, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.14032736069367793, |
|
"grad_norm": 3.1706273555755615, |
|
"learning_rate": 9.63256673886134e-07, |
|
"loss": 0.5389, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.14137457980333226, |
|
"grad_norm": 2.9992752075195312, |
|
"learning_rate": 9.626225000504177e-07, |
|
"loss": 0.5517, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.14242179891298656, |
|
"grad_norm": 1.2536182403564453, |
|
"learning_rate": 9.619831125698552e-07, |
|
"loss": 0.5304, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.14346901802264087, |
|
"grad_norm": 2.491206645965576, |
|
"learning_rate": 9.6133851865019e-07, |
|
"loss": 0.5001, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.1445162371322952, |
|
"grad_norm": 2.180227518081665, |
|
"learning_rate": 9.606887255558417e-07, |
|
"loss": 0.5149, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.1455634562419495, |
|
"grad_norm": 1.546883463859558, |
|
"learning_rate": 9.60033740609823e-07, |
|
"loss": 0.5566, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.14661067535160383, |
|
"grad_norm": 2.402559757232666, |
|
"learning_rate": 9.593735711936567e-07, |
|
"loss": 0.5343, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.14765789446125813, |
|
"grad_norm": 4.94249153137207, |
|
"learning_rate": 9.587082247472948e-07, |
|
"loss": 0.516, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.14870511357091243, |
|
"grad_norm": 1.760003924369812, |
|
"learning_rate": 9.580377087690324e-07, |
|
"loss": 0.5395, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.14975233268056676, |
|
"grad_norm": 2.1215927600860596, |
|
"learning_rate": 9.573620308154238e-07, |
|
"loss": 0.55, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.15079955179022106, |
|
"grad_norm": 2.929760217666626, |
|
"learning_rate": 9.566811985011981e-07, |
|
"loss": 0.5571, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.1518467708998754, |
|
"grad_norm": 2.7724721431732178, |
|
"learning_rate": 9.559952194991726e-07, |
|
"loss": 0.5712, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.1528939900095297, |
|
"grad_norm": 2.270812749862671, |
|
"learning_rate": 9.55304101540166e-07, |
|
"loss": 0.5355, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.15394120911918402, |
|
"grad_norm": 2.3572235107421875, |
|
"learning_rate": 9.546078524129127e-07, |
|
"loss": 0.5595, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.15498842822883832, |
|
"grad_norm": 1.5402534008026123, |
|
"learning_rate": 9.539064799639735e-07, |
|
"loss": 0.5561, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.15603564733849262, |
|
"grad_norm": 3.2286136150360107, |
|
"learning_rate": 9.531999920976481e-07, |
|
"loss": 0.4951, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.15708286644814695, |
|
"grad_norm": 1.4825396537780762, |
|
"learning_rate": 9.524883967758858e-07, |
|
"loss": 0.5099, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.15813008555780125, |
|
"grad_norm": 1.649629831314087, |
|
"learning_rate": 9.517717020181953e-07, |
|
"loss": 0.5694, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.15917730466745558, |
|
"grad_norm": 1.8996721506118774, |
|
"learning_rate": 9.510499159015553e-07, |
|
"loss": 0.5364, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.16022452377710988, |
|
"grad_norm": 3.648730993270874, |
|
"learning_rate": 9.50323046560322e-07, |
|
"loss": 0.5276, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.16127174288676419, |
|
"grad_norm": 2.633986473083496, |
|
"learning_rate": 9.495911021861396e-07, |
|
"loss": 0.5399, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.16231896199641851, |
|
"grad_norm": 1.8254631757736206, |
|
"learning_rate": 9.488540910278455e-07, |
|
"loss": 0.5484, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.16336618110607282, |
|
"grad_norm": 2.676395893096924, |
|
"learning_rate": 9.481120213913794e-07, |
|
"loss": 0.5741, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.16441340021572715, |
|
"grad_norm": 3.6794283390045166, |
|
"learning_rate": 9.47364901639688e-07, |
|
"loss": 0.5481, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.16546061932538145, |
|
"grad_norm": 1.8362795114517212, |
|
"learning_rate": 9.466127401926326e-07, |
|
"loss": 0.5704, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.16650783843503575, |
|
"grad_norm": 2.256762742996216, |
|
"learning_rate": 9.458555455268924e-07, |
|
"loss": 0.5159, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.16755505754469008, |
|
"grad_norm": 2.6386005878448486, |
|
"learning_rate": 9.450933261758702e-07, |
|
"loss": 0.4916, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.16860227665434438, |
|
"grad_norm": 2.635512113571167, |
|
"learning_rate": 9.443260907295955e-07, |
|
"loss": 0.508, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.1696494957639987, |
|
"grad_norm": 1.6727428436279297, |
|
"learning_rate": 9.435538478346282e-07, |
|
"loss": 0.5282, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.170696714873653, |
|
"grad_norm": 2.1256072521209717, |
|
"learning_rate": 9.42776606193961e-07, |
|
"loss": 0.5878, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.17174393398330734, |
|
"grad_norm": 2.557060956954956, |
|
"learning_rate": 9.419943745669209e-07, |
|
"loss": 0.5392, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.17279115309296164, |
|
"grad_norm": 2.912794828414917, |
|
"learning_rate": 9.412071617690713e-07, |
|
"loss": 0.5631, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.17383837220261594, |
|
"grad_norm": 2.380751132965088, |
|
"learning_rate": 9.40414976672112e-07, |
|
"loss": 0.5518, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.17488559131227027, |
|
"grad_norm": 2.5645503997802734, |
|
"learning_rate": 9.396178282037795e-07, |
|
"loss": 0.5377, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.17593281042192457, |
|
"grad_norm": 2.270052433013916, |
|
"learning_rate": 9.388157253477459e-07, |
|
"loss": 0.524, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.1769800295315789, |
|
"grad_norm": 2.3046374320983887, |
|
"learning_rate": 9.380086771435187e-07, |
|
"loss": 0.5224, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.1780272486412332, |
|
"grad_norm": 1.9633408784866333, |
|
"learning_rate": 9.371966926863381e-07, |
|
"loss": 0.5241, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.1790744677508875, |
|
"grad_norm": 2.206256628036499, |
|
"learning_rate": 9.363797811270743e-07, |
|
"loss": 0.5599, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.18012168686054184, |
|
"grad_norm": 2.883242607116699, |
|
"learning_rate": 9.355579516721251e-07, |
|
"loss": 0.5472, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.18116890597019614, |
|
"grad_norm": 3.9055755138397217, |
|
"learning_rate": 9.34731213583312e-07, |
|
"loss": 0.5463, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.18221612507985047, |
|
"grad_norm": 2.9254720211029053, |
|
"learning_rate": 9.338995761777751e-07, |
|
"loss": 0.5385, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.18326334418950477, |
|
"grad_norm": 2.070220947265625, |
|
"learning_rate": 9.33063048827869e-07, |
|
"loss": 0.597, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.18431056329915907, |
|
"grad_norm": 2.241502285003662, |
|
"learning_rate": 9.322216409610566e-07, |
|
"loss": 0.4954, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.1853577824088134, |
|
"grad_norm": 2.7689974308013916, |
|
"learning_rate": 9.313753620598035e-07, |
|
"loss": 0.5536, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.1864050015184677, |
|
"grad_norm": 2.5464389324188232, |
|
"learning_rate": 9.3052422166147e-07, |
|
"loss": 0.5342, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.18745222062812203, |
|
"grad_norm": 1.727013111114502, |
|
"learning_rate": 9.296682293582049e-07, |
|
"loss": 0.5383, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.18849943973777633, |
|
"grad_norm": 4.623219966888428, |
|
"learning_rate": 9.288073947968364e-07, |
|
"loss": 0.5305, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.18954665884743066, |
|
"grad_norm": 1.5261229276657104, |
|
"learning_rate": 9.27941727678764e-07, |
|
"loss": 0.5235, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.19059387795708496, |
|
"grad_norm": 1.9866268634796143, |
|
"learning_rate": 9.270712377598491e-07, |
|
"loss": 0.5217, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.19164109706673926, |
|
"grad_norm": 3.0393967628479004, |
|
"learning_rate": 9.261959348503046e-07, |
|
"loss": 0.5241, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.1926883161763936, |
|
"grad_norm": 2.8217124938964844, |
|
"learning_rate": 9.253158288145848e-07, |
|
"loss": 0.5713, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.1937355352860479, |
|
"grad_norm": 2.327930450439453, |
|
"learning_rate": 9.24430929571274e-07, |
|
"loss": 0.5191, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.19478275439570222, |
|
"grad_norm": 2.090432643890381, |
|
"learning_rate": 9.235412470929748e-07, |
|
"loss": 0.5285, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.19582997350535652, |
|
"grad_norm": 2.427619457244873, |
|
"learning_rate": 9.226467914061962e-07, |
|
"loss": 0.5157, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.19687719261501083, |
|
"grad_norm": 3.4102041721343994, |
|
"learning_rate": 9.217475725912391e-07, |
|
"loss": 0.52, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.19792441172466516, |
|
"grad_norm": 1.7967109680175781, |
|
"learning_rate": 9.208436007820848e-07, |
|
"loss": 0.514, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.19897163083431946, |
|
"grad_norm": 2.5887088775634766, |
|
"learning_rate": 9.19934886166279e-07, |
|
"loss": 0.4798, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.2000188499439738, |
|
"grad_norm": 2.08363676071167, |
|
"learning_rate": 9.190214389848181e-07, |
|
"loss": 0.5348, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.2010660690536281, |
|
"grad_norm": 2.4554569721221924, |
|
"learning_rate": 9.18103269532033e-07, |
|
"loss": 0.4976, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.20211328816328242, |
|
"grad_norm": 2.604750633239746, |
|
"learning_rate": 9.171803881554736e-07, |
|
"loss": 0.5048, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.20316050727293672, |
|
"grad_norm": 1.9831663370132446, |
|
"learning_rate": 9.162528052557925e-07, |
|
"loss": 0.5618, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.20420772638259102, |
|
"grad_norm": 2.6448137760162354, |
|
"learning_rate": 9.153205312866265e-07, |
|
"loss": 0.5382, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.20525494549224535, |
|
"grad_norm": 2.27817964553833, |
|
"learning_rate": 9.143835767544805e-07, |
|
"loss": 0.5189, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.20630216460189965, |
|
"grad_norm": 1.8295369148254395, |
|
"learning_rate": 9.134419522186075e-07, |
|
"loss": 0.5083, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.20734938371155398, |
|
"grad_norm": 3.7082695960998535, |
|
"learning_rate": 9.124956682908908e-07, |
|
"loss": 0.4839, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.20839660282120828, |
|
"grad_norm": 2.17672061920166, |
|
"learning_rate": 9.115447356357238e-07, |
|
"loss": 0.5203, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.20944382193086258, |
|
"grad_norm": 2.759127378463745, |
|
"learning_rate": 9.105891649698898e-07, |
|
"loss": 0.5339, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.2104910410405169, |
|
"grad_norm": 2.4461498260498047, |
|
"learning_rate": 9.096289670624416e-07, |
|
"loss": 0.5536, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.21153826015017121, |
|
"grad_norm": 2.8688385486602783, |
|
"learning_rate": 9.086641527345796e-07, |
|
"loss": 0.5266, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.21258547925982554, |
|
"grad_norm": 2.589167356491089, |
|
"learning_rate": 9.076947328595306e-07, |
|
"loss": 0.5031, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.21363269836947985, |
|
"grad_norm": 3.033956289291382, |
|
"learning_rate": 9.067207183624243e-07, |
|
"loss": 0.5288, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.21467991747913415, |
|
"grad_norm": 2.5122592449188232, |
|
"learning_rate": 9.057421202201714e-07, |
|
"loss": 0.5002, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.21572713658878848, |
|
"grad_norm": 2.099766731262207, |
|
"learning_rate": 9.047589494613381e-07, |
|
"loss": 0.5389, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.21677435569844278, |
|
"grad_norm": 2.65134596824646, |
|
"learning_rate": 9.037712171660241e-07, |
|
"loss": 0.5537, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.2178215748080971, |
|
"grad_norm": 2.301417589187622, |
|
"learning_rate": 9.027789344657357e-07, |
|
"loss": 0.5554, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.2188687939177514, |
|
"grad_norm": 2.6696295738220215, |
|
"learning_rate": 9.017821125432612e-07, |
|
"loss": 0.5191, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.21991601302740574, |
|
"grad_norm": 2.455559015274048, |
|
"learning_rate": 9.007807626325455e-07, |
|
"loss": 0.5053, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.22096323213706004, |
|
"grad_norm": 2.676161289215088, |
|
"learning_rate": 8.997748960185622e-07, |
|
"loss": 0.518, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.22201045124671434, |
|
"grad_norm": 2.6200263500213623, |
|
"learning_rate": 8.987645240371873e-07, |
|
"loss": 0.4884, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.22305767035636867, |
|
"grad_norm": 3.8255863189697266, |
|
"learning_rate": 8.977496580750712e-07, |
|
"loss": 0.5348, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.22410488946602297, |
|
"grad_norm": 2.0892577171325684, |
|
"learning_rate": 8.967303095695105e-07, |
|
"loss": 0.5178, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.2251521085756773, |
|
"grad_norm": 2.40419864654541, |
|
"learning_rate": 8.957064900083187e-07, |
|
"loss": 0.584, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.2261993276853316, |
|
"grad_norm": 3.042703628540039, |
|
"learning_rate": 8.946782109296973e-07, |
|
"loss": 0.5267, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.2272465467949859, |
|
"grad_norm": 1.6234790086746216, |
|
"learning_rate": 8.936454839221054e-07, |
|
"loss": 0.5217, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.22829376590464023, |
|
"grad_norm": 1.706650972366333, |
|
"learning_rate": 8.926083206241291e-07, |
|
"loss": 0.5242, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.22934098501429453, |
|
"grad_norm": 4.158198833465576, |
|
"learning_rate": 8.915667327243506e-07, |
|
"loss": 0.524, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.23038820412394886, |
|
"grad_norm": 2.2484548091888428, |
|
"learning_rate": 8.905207319612163e-07, |
|
"loss": 0.5347, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.23143542323360317, |
|
"grad_norm": 2.990169048309326, |
|
"learning_rate": 8.894703301229043e-07, |
|
"loss": 0.5408, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.23248264234325747, |
|
"grad_norm": 3.9766592979431152, |
|
"learning_rate": 8.884155390471919e-07, |
|
"loss": 0.5046, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.2335298614529118, |
|
"grad_norm": 2.5463485717773438, |
|
"learning_rate": 8.873563706213221e-07, |
|
"loss": 0.4881, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.2345770805625661, |
|
"grad_norm": 2.7277047634124756, |
|
"learning_rate": 8.862928367818696e-07, |
|
"loss": 0.5228, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.23562429967222043, |
|
"grad_norm": 1.9528217315673828, |
|
"learning_rate": 8.852249495146063e-07, |
|
"loss": 0.5056, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.23667151878187473, |
|
"grad_norm": 2.527414083480835, |
|
"learning_rate": 8.841527208543658e-07, |
|
"loss": 0.5186, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.23771873789152906, |
|
"grad_norm": 1.9525986909866333, |
|
"learning_rate": 8.830761628849087e-07, |
|
"loss": 0.5195, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.23876595700118336, |
|
"grad_norm": 1.6230095624923706, |
|
"learning_rate": 8.819952877387855e-07, |
|
"loss": 0.4834, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.23981317611083766, |
|
"grad_norm": 2.2290198802948, |
|
"learning_rate": 8.809101075972005e-07, |
|
"loss": 0.5207, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.240860395220492, |
|
"grad_norm": 3.419203996658325, |
|
"learning_rate": 8.798206346898743e-07, |
|
"loss": 0.5064, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.2419076143301463, |
|
"grad_norm": 2.360508441925049, |
|
"learning_rate": 8.787268812949054e-07, |
|
"loss": 0.5011, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.24295483343980062, |
|
"grad_norm": 1.8023535013198853, |
|
"learning_rate": 8.77628859738633e-07, |
|
"loss": 0.5099, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.24400205254945492, |
|
"grad_norm": 1.9575679302215576, |
|
"learning_rate": 8.765265823954972e-07, |
|
"loss": 0.5361, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.24504927165910922, |
|
"grad_norm": 1.5841313600540161, |
|
"learning_rate": 8.754200616879001e-07, |
|
"loss": 0.541, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.24609649076876355, |
|
"grad_norm": 2.8605728149414062, |
|
"learning_rate": 8.743093100860648e-07, |
|
"loss": 0.5541, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.24714370987841786, |
|
"grad_norm": 1.696733832359314, |
|
"learning_rate": 8.731943401078961e-07, |
|
"loss": 0.511, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.24819092898807218, |
|
"grad_norm": 2.1618356704711914, |
|
"learning_rate": 8.720751643188389e-07, |
|
"loss": 0.5066, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.2492381480977265, |
|
"grad_norm": 2.721067428588867, |
|
"learning_rate": 8.709517953317365e-07, |
|
"loss": 0.5398, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.2502853672073808, |
|
"grad_norm": 1.8457568883895874, |
|
"learning_rate": 8.698242458066882e-07, |
|
"loss": 0.4879, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.2513325863170351, |
|
"grad_norm": 2.435941696166992, |
|
"learning_rate": 8.686925284509077e-07, |
|
"loss": 0.531, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.2523798054266894, |
|
"grad_norm": 2.617920160293579, |
|
"learning_rate": 8.675566560185786e-07, |
|
"loss": 0.5189, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.25342702453634375, |
|
"grad_norm": 2.538632869720459, |
|
"learning_rate": 8.664166413107109e-07, |
|
"loss": 0.5433, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.2544742436459981, |
|
"grad_norm": 2.3944451808929443, |
|
"learning_rate": 8.65272497174998e-07, |
|
"loss": 0.5401, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.25552146275565235, |
|
"grad_norm": 3.6203765869140625, |
|
"learning_rate": 8.641242365056705e-07, |
|
"loss": 0.544, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.2565686818653067, |
|
"grad_norm": 2.866250991821289, |
|
"learning_rate": 8.629718722433507e-07, |
|
"loss": 0.5357, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.257615900974961, |
|
"grad_norm": 3.3872838020324707, |
|
"learning_rate": 8.618154173749088e-07, |
|
"loss": 0.5261, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.2586631200846153, |
|
"grad_norm": 2.269967794418335, |
|
"learning_rate": 8.606548849333138e-07, |
|
"loss": 0.5128, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.2597103391942696, |
|
"grad_norm": 2.1335697174072266, |
|
"learning_rate": 8.594902879974888e-07, |
|
"loss": 0.5645, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.26075755830392394, |
|
"grad_norm": 2.443239212036133, |
|
"learning_rate": 8.583216396921624e-07, |
|
"loss": 0.4806, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.2618047774135782, |
|
"grad_norm": 2.713833808898926, |
|
"learning_rate": 8.571489531877214e-07, |
|
"loss": 0.5271, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.26285199652323255, |
|
"grad_norm": 3.485182046890259, |
|
"learning_rate": 8.559722417000619e-07, |
|
"loss": 0.4962, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.2638992156328869, |
|
"grad_norm": 2.306403160095215, |
|
"learning_rate": 8.547915184904409e-07, |
|
"loss": 0.5122, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.2649464347425412, |
|
"grad_norm": 2.6151928901672363, |
|
"learning_rate": 8.536067968653261e-07, |
|
"loss": 0.5316, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.2659936538521955, |
|
"grad_norm": 2.3466389179229736, |
|
"learning_rate": 8.524180901762469e-07, |
|
"loss": 0.4991, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.2670408729618498, |
|
"grad_norm": 2.0926601886749268, |
|
"learning_rate": 8.512254118196429e-07, |
|
"loss": 0.5254, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.26808809207150414, |
|
"grad_norm": 1.9708478450775146, |
|
"learning_rate": 8.500287752367142e-07, |
|
"loss": 0.507, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.2691353111811584, |
|
"grad_norm": 2.028843879699707, |
|
"learning_rate": 8.48828193913268e-07, |
|
"loss": 0.5066, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.27018253029081274, |
|
"grad_norm": 2.9337289333343506, |
|
"learning_rate": 8.47623681379569e-07, |
|
"loss": 0.5023, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.27122974940046707, |
|
"grad_norm": 2.8608200550079346, |
|
"learning_rate": 8.464152512101848e-07, |
|
"loss": 0.5417, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.2722769685101214, |
|
"grad_norm": 3.0925405025482178, |
|
"learning_rate": 8.452029170238344e-07, |
|
"loss": 0.5415, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.27332418761977567, |
|
"grad_norm": 1.9558321237564087, |
|
"learning_rate": 8.439866924832338e-07, |
|
"loss": 0.519, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.27437140672943, |
|
"grad_norm": 1.5545213222503662, |
|
"learning_rate": 8.427665912949425e-07, |
|
"loss": 0.5441, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.27541862583908433, |
|
"grad_norm": 3.6202712059020996, |
|
"learning_rate": 8.415426272092089e-07, |
|
"loss": 0.5559, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.2764658449487386, |
|
"grad_norm": 1.8004056215286255, |
|
"learning_rate": 8.403148140198151e-07, |
|
"loss": 0.5034, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.27751306405839293, |
|
"grad_norm": 2.5597338676452637, |
|
"learning_rate": 8.390831655639223e-07, |
|
"loss": 0.5294, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.27856028316804726, |
|
"grad_norm": 2.014400005340576, |
|
"learning_rate": 8.378476957219134e-07, |
|
"loss": 0.5663, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.27960750227770154, |
|
"grad_norm": 2.069840669631958, |
|
"learning_rate": 8.366084184172377e-07, |
|
"loss": 0.5007, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.28065472138735587, |
|
"grad_norm": 5.621069431304932, |
|
"learning_rate": 8.353653476162543e-07, |
|
"loss": 0.5263, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.2817019404970102, |
|
"grad_norm": 3.1065540313720703, |
|
"learning_rate": 8.341184973280732e-07, |
|
"loss": 0.5048, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.2827491596066645, |
|
"grad_norm": 2.579742431640625, |
|
"learning_rate": 8.328678816043988e-07, |
|
"loss": 0.5272, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.2837963787163188, |
|
"grad_norm": 2.476778030395508, |
|
"learning_rate": 8.31613514539371e-07, |
|
"loss": 0.4944, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.2848435978259731, |
|
"grad_norm": 2.7026314735412598, |
|
"learning_rate": 8.303554102694065e-07, |
|
"loss": 0.5257, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.28589081693562746, |
|
"grad_norm": 2.1597368717193604, |
|
"learning_rate": 8.290935829730391e-07, |
|
"loss": 0.5282, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.28693803604528173, |
|
"grad_norm": 2.447305202484131, |
|
"learning_rate": 8.278280468707606e-07, |
|
"loss": 0.5295, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.28798525515493606, |
|
"grad_norm": 2.806995391845703, |
|
"learning_rate": 8.265588162248597e-07, |
|
"loss": 0.4933, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.2890324742645904, |
|
"grad_norm": 2.1765849590301514, |
|
"learning_rate": 8.252859053392622e-07, |
|
"loss": 0.5486, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.2900796933742447, |
|
"grad_norm": 2.122382640838623, |
|
"learning_rate": 8.240093285593692e-07, |
|
"loss": 0.5255, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.291126912483899, |
|
"grad_norm": 2.136657476425171, |
|
"learning_rate": 8.22729100271895e-07, |
|
"loss": 0.5214, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.2921741315935533, |
|
"grad_norm": 2.033987522125244, |
|
"learning_rate": 8.214452349047065e-07, |
|
"loss": 0.5065, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.29322135070320765, |
|
"grad_norm": 3.346703290939331, |
|
"learning_rate": 8.20157746926659e-07, |
|
"loss": 0.5349, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.2942685698128619, |
|
"grad_norm": 2.63242244720459, |
|
"learning_rate": 8.188666508474335e-07, |
|
"loss": 0.5264, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.29531578892251625, |
|
"grad_norm": 2.475911855697632, |
|
"learning_rate": 8.175719612173741e-07, |
|
"loss": 0.5186, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.2963630080321706, |
|
"grad_norm": 1.5967457294464111, |
|
"learning_rate": 8.162736926273231e-07, |
|
"loss": 0.5321, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.29741022714182486, |
|
"grad_norm": 1.6950793266296387, |
|
"learning_rate": 8.149718597084565e-07, |
|
"loss": 0.5028, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.2984574462514792, |
|
"grad_norm": 1.8821123838424683, |
|
"learning_rate": 8.136664771321198e-07, |
|
"loss": 0.5147, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.2995046653611335, |
|
"grad_norm": 3.8432750701904297, |
|
"learning_rate": 8.123575596096624e-07, |
|
"loss": 0.5055, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.30055188447078784, |
|
"grad_norm": 2.2065136432647705, |
|
"learning_rate": 8.110451218922711e-07, |
|
"loss": 0.4804, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.3015991035804421, |
|
"grad_norm": 3.215104103088379, |
|
"learning_rate": 8.097291787708052e-07, |
|
"loss": 0.508, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.30264632269009645, |
|
"grad_norm": 2.6659111976623535, |
|
"learning_rate": 8.084097450756286e-07, |
|
"loss": 0.5058, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.3036935417997508, |
|
"grad_norm": 3.1594624519348145, |
|
"learning_rate": 8.070868356764431e-07, |
|
"loss": 0.4819, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.30474076090940505, |
|
"grad_norm": 3.2502479553222656, |
|
"learning_rate": 8.05760465482121e-07, |
|
"loss": 0.5132, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.3057879800190594, |
|
"grad_norm": 2.3569111824035645, |
|
"learning_rate": 8.044306494405372e-07, |
|
"loss": 0.4989, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.3068351991287137, |
|
"grad_norm": 2.7516555786132812, |
|
"learning_rate": 8.030974025384e-07, |
|
"loss": 0.4982, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.30788241823836804, |
|
"grad_norm": 2.388401508331299, |
|
"learning_rate": 8.017607398010829e-07, |
|
"loss": 0.492, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.3089296373480223, |
|
"grad_norm": 2.49920392036438, |
|
"learning_rate": 8.004206762924548e-07, |
|
"loss": 0.4729, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.30997685645767664, |
|
"grad_norm": 2.528714179992676, |
|
"learning_rate": 7.99077227114711e-07, |
|
"loss": 0.5229, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.31102407556733097, |
|
"grad_norm": 2.0866329669952393, |
|
"learning_rate": 7.977304074082021e-07, |
|
"loss": 0.483, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.31207129467698524, |
|
"grad_norm": 3.1670796871185303, |
|
"learning_rate": 7.963802323512638e-07, |
|
"loss": 0.4816, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.3131185137866396, |
|
"grad_norm": 1.9715406894683838, |
|
"learning_rate": 7.950267171600458e-07, |
|
"loss": 0.4666, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.3141657328962939, |
|
"grad_norm": 1.6176679134368896, |
|
"learning_rate": 7.936698770883404e-07, |
|
"loss": 0.4886, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.3152129520059482, |
|
"grad_norm": 2.4239096641540527, |
|
"learning_rate": 7.923097274274103e-07, |
|
"loss": 0.5085, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.3162601711156025, |
|
"grad_norm": 1.8292428255081177, |
|
"learning_rate": 7.909462835058169e-07, |
|
"loss": 0.538, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.31730739022525684, |
|
"grad_norm": 2.2372076511383057, |
|
"learning_rate": 7.895795606892466e-07, |
|
"loss": 0.5099, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.31835460933491116, |
|
"grad_norm": 1.9392811059951782, |
|
"learning_rate": 7.882095743803386e-07, |
|
"loss": 0.4947, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.31940182844456544, |
|
"grad_norm": 2.645183801651001, |
|
"learning_rate": 7.868363400185106e-07, |
|
"loss": 0.5012, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.32044904755421977, |
|
"grad_norm": 3.2452821731567383, |
|
"learning_rate": 7.85459873079785e-07, |
|
"loss": 0.4696, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.3214962666638741, |
|
"grad_norm": 1.310027003288269, |
|
"learning_rate": 7.84080189076615e-07, |
|
"loss": 0.5183, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.32254348577352837, |
|
"grad_norm": 2.6369211673736572, |
|
"learning_rate": 7.826973035577091e-07, |
|
"loss": 0.5135, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.3235907048831827, |
|
"grad_norm": 2.9246723651885986, |
|
"learning_rate": 7.813112321078559e-07, |
|
"loss": 0.527, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.32463792399283703, |
|
"grad_norm": 3.309020519256592, |
|
"learning_rate": 7.799219903477489e-07, |
|
"loss": 0.5322, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.32568514310249136, |
|
"grad_norm": 2.4480512142181396, |
|
"learning_rate": 7.785295939338105e-07, |
|
"loss": 0.5234, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.32673236221214563, |
|
"grad_norm": 1.7909550666809082, |
|
"learning_rate": 7.771340585580149e-07, |
|
"loss": 0.4938, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.32777958132179996, |
|
"grad_norm": 2.6975667476654053, |
|
"learning_rate": 7.757353999477114e-07, |
|
"loss": 0.491, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.3288268004314543, |
|
"grad_norm": 2.4480390548706055, |
|
"learning_rate": 7.743336338654483e-07, |
|
"loss": 0.538, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.32987401954110857, |
|
"grad_norm": 1.8292025327682495, |
|
"learning_rate": 7.729287761087935e-07, |
|
"loss": 0.4906, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.3309212386507629, |
|
"grad_norm": 1.5502568483352661, |
|
"learning_rate": 7.715208425101576e-07, |
|
"loss": 0.459, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.3319684577604172, |
|
"grad_norm": 2.6698973178863525, |
|
"learning_rate": 7.701098489366156e-07, |
|
"loss": 0.5086, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.3330156768700715, |
|
"grad_norm": 2.4431324005126953, |
|
"learning_rate": 7.686958112897271e-07, |
|
"loss": 0.4843, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.3340628959797258, |
|
"grad_norm": 2.875575065612793, |
|
"learning_rate": 7.67278745505358e-07, |
|
"loss": 0.5171, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.33511011508938016, |
|
"grad_norm": 2.196960210800171, |
|
"learning_rate": 7.658586675535005e-07, |
|
"loss": 0.5026, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.3361573341990345, |
|
"grad_norm": 2.801039457321167, |
|
"learning_rate": 7.644355934380933e-07, |
|
"loss": 0.5175, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.33720455330868876, |
|
"grad_norm": 2.4252429008483887, |
|
"learning_rate": 7.630095391968407e-07, |
|
"loss": 0.492, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.3382517724183431, |
|
"grad_norm": 1.9080466032028198, |
|
"learning_rate": 7.615805209010334e-07, |
|
"loss": 0.5203, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.3392989915279974, |
|
"grad_norm": 1.8371050357818604, |
|
"learning_rate": 7.601485546553647e-07, |
|
"loss": 0.5028, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.3403462106376517, |
|
"grad_norm": 3.5394959449768066, |
|
"learning_rate": 7.587136565977522e-07, |
|
"loss": 0.5203, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.341393429747306, |
|
"grad_norm": 2.381826400756836, |
|
"learning_rate": 7.572758428991532e-07, |
|
"loss": 0.5254, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.34244064885696035, |
|
"grad_norm": 1.7615987062454224, |
|
"learning_rate": 7.55835129763384e-07, |
|
"loss": 0.5091, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.3434878679666147, |
|
"grad_norm": 2.329334020614624, |
|
"learning_rate": 7.543915334269365e-07, |
|
"loss": 0.5004, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.34453508707626895, |
|
"grad_norm": 2.9679040908813477, |
|
"learning_rate": 7.529450701587963e-07, |
|
"loss": 0.5114, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.3455823061859233, |
|
"grad_norm": 3.3162288665771484, |
|
"learning_rate": 7.514957562602582e-07, |
|
"loss": 0.5055, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.3466295252955776, |
|
"grad_norm": 2.0709986686706543, |
|
"learning_rate": 7.500436080647428e-07, |
|
"loss": 0.5574, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.3476767444052319, |
|
"grad_norm": 2.1400296688079834, |
|
"learning_rate": 7.485886419376126e-07, |
|
"loss": 0.5777, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.3487239635148862, |
|
"grad_norm": 2.4479362964630127, |
|
"learning_rate": 7.471308742759879e-07, |
|
"loss": 0.5378, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.34977118262454054, |
|
"grad_norm": 2.2012875080108643, |
|
"learning_rate": 7.456703215085609e-07, |
|
"loss": 0.4941, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.3508184017341948, |
|
"grad_norm": 2.5233943462371826, |
|
"learning_rate": 7.44207000095412e-07, |
|
"loss": 0.547, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.35186562084384915, |
|
"grad_norm": 2.050294876098633, |
|
"learning_rate": 7.427409265278235e-07, |
|
"loss": 0.5326, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.3529128399535035, |
|
"grad_norm": 1.9416810274124146, |
|
"learning_rate": 7.412721173280931e-07, |
|
"loss": 0.5373, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.3539600590631578, |
|
"grad_norm": 2.4550209045410156, |
|
"learning_rate": 7.398005890493493e-07, |
|
"loss": 0.5025, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.3550072781728121, |
|
"grad_norm": 2.1860315799713135, |
|
"learning_rate": 7.383263582753633e-07, |
|
"loss": 0.4961, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.3560544972824664, |
|
"grad_norm": 3.3393681049346924, |
|
"learning_rate": 7.368494416203632e-07, |
|
"loss": 0.5014, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.35710171639212074, |
|
"grad_norm": 2.2855758666992188, |
|
"learning_rate": 7.353698557288462e-07, |
|
"loss": 0.5179, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.358148935501775, |
|
"grad_norm": 2.719910144805908, |
|
"learning_rate": 7.338876172753913e-07, |
|
"loss": 0.5151, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.35919615461142934, |
|
"grad_norm": 2.3122212886810303, |
|
"learning_rate": 7.324027429644709e-07, |
|
"loss": 0.5075, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.36024337372108367, |
|
"grad_norm": 2.5901198387145996, |
|
"learning_rate": 7.309152495302631e-07, |
|
"loss": 0.5185, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.361290592830738, |
|
"grad_norm": 2.749903440475464, |
|
"learning_rate": 7.294251537364629e-07, |
|
"loss": 0.4728, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.3623378119403923, |
|
"grad_norm": 2.453977108001709, |
|
"learning_rate": 7.279324723760932e-07, |
|
"loss": 0.5197, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.3633850310500466, |
|
"grad_norm": 3.2406835556030273, |
|
"learning_rate": 7.264372222713157e-07, |
|
"loss": 0.4856, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.36443225015970093, |
|
"grad_norm": 2.1802427768707275, |
|
"learning_rate": 7.249394202732414e-07, |
|
"loss": 0.4996, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.3654794692693552, |
|
"grad_norm": 1.560670256614685, |
|
"learning_rate": 7.234390832617399e-07, |
|
"loss": 0.5032, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.36652668837900954, |
|
"grad_norm": 2.8153815269470215, |
|
"learning_rate": 7.219362281452504e-07, |
|
"loss": 0.4882, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.36757390748866386, |
|
"grad_norm": 3.205367088317871, |
|
"learning_rate": 7.204308718605906e-07, |
|
"loss": 0.5232, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.36862112659831814, |
|
"grad_norm": 1.6098523139953613, |
|
"learning_rate": 7.189230313727651e-07, |
|
"loss": 0.488, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.36966834570797247, |
|
"grad_norm": 2.2674808502197266, |
|
"learning_rate": 7.174127236747756e-07, |
|
"loss": 0.5026, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.3707155648176268, |
|
"grad_norm": 2.0923283100128174, |
|
"learning_rate": 7.158999657874283e-07, |
|
"loss": 0.5292, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.3717627839272811, |
|
"grad_norm": 2.078521251678467, |
|
"learning_rate": 7.143847747591423e-07, |
|
"loss": 0.5002, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.3728100030369354, |
|
"grad_norm": 2.299473285675049, |
|
"learning_rate": 7.128671676657579e-07, |
|
"loss": 0.5132, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.37385722214658973, |
|
"grad_norm": 1.3978760242462158, |
|
"learning_rate": 7.113471616103441e-07, |
|
"loss": 0.5182, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.37490444125624406, |
|
"grad_norm": 2.559293746948242, |
|
"learning_rate": 7.098247737230052e-07, |
|
"loss": 0.5202, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.37595166036589833, |
|
"grad_norm": 2.457498788833618, |
|
"learning_rate": 7.083000211606881e-07, |
|
"loss": 0.4946, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.37699887947555266, |
|
"grad_norm": 1.9849262237548828, |
|
"learning_rate": 7.067729211069892e-07, |
|
"loss": 0.4932, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.378046098585207, |
|
"grad_norm": 2.242328405380249, |
|
"learning_rate": 7.05243490771961e-07, |
|
"loss": 0.4853, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.3790933176948613, |
|
"grad_norm": 4.18756103515625, |
|
"learning_rate": 7.037117473919169e-07, |
|
"loss": 0.5271, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.3801405368045156, |
|
"grad_norm": 2.454249382019043, |
|
"learning_rate": 7.021777082292384e-07, |
|
"loss": 0.5208, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.3811877559141699, |
|
"grad_norm": 1.5989599227905273, |
|
"learning_rate": 7.006413905721796e-07, |
|
"loss": 0.5252, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.38223497502382425, |
|
"grad_norm": 3.1384224891662598, |
|
"learning_rate": 6.991028117346727e-07, |
|
"loss": 0.5231, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.3832821941334785, |
|
"grad_norm": 3.674887180328369, |
|
"learning_rate": 6.975619890561331e-07, |
|
"loss": 0.5338, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.38432941324313286, |
|
"grad_norm": 2.8714184761047363, |
|
"learning_rate": 6.960189399012635e-07, |
|
"loss": 0.4667, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.3853766323527872, |
|
"grad_norm": 2.0271899700164795, |
|
"learning_rate": 6.944736816598585e-07, |
|
"loss": 0.5439, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.38642385146244146, |
|
"grad_norm": 2.3302154541015625, |
|
"learning_rate": 6.929262317466087e-07, |
|
"loss": 0.5085, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.3874710705720958, |
|
"grad_norm": 1.89630126953125, |
|
"learning_rate": 6.913766076009042e-07, |
|
"loss": 0.489, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.3885182896817501, |
|
"grad_norm": 3.864342212677002, |
|
"learning_rate": 6.898248266866383e-07, |
|
"loss": 0.4782, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.38956550879140445, |
|
"grad_norm": 3.6760518550872803, |
|
"learning_rate": 6.882709064920104e-07, |
|
"loss": 0.5387, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.3906127279010587, |
|
"grad_norm": 2.225639581680298, |
|
"learning_rate": 6.867148645293292e-07, |
|
"loss": 0.5417, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.39165994701071305, |
|
"grad_norm": 1.6425765752792358, |
|
"learning_rate": 6.85156718334815e-07, |
|
"loss": 0.501, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.3927071661203674, |
|
"grad_norm": 2.095388650894165, |
|
"learning_rate": 6.835964854684027e-07, |
|
"loss": 0.5244, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.39375438523002165, |
|
"grad_norm": 1.9956177473068237, |
|
"learning_rate": 6.820341835135434e-07, |
|
"loss": 0.4862, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.394801604339676, |
|
"grad_norm": 2.3689606189727783, |
|
"learning_rate": 6.804698300770058e-07, |
|
"loss": 0.5174, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.3958488234493303, |
|
"grad_norm": 2.4154350757598877, |
|
"learning_rate": 6.789034427886788e-07, |
|
"loss": 0.5232, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.39689604255898464, |
|
"grad_norm": 2.841860055923462, |
|
"learning_rate": 6.773350393013725e-07, |
|
"loss": 0.4952, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.3979432616686389, |
|
"grad_norm": 1.6685402393341064, |
|
"learning_rate": 6.757646372906183e-07, |
|
"loss": 0.5136, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.39899048077829324, |
|
"grad_norm": 2.3947384357452393, |
|
"learning_rate": 6.741922544544716e-07, |
|
"loss": 0.4728, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.4000376998879476, |
|
"grad_norm": 1.9924613237380981, |
|
"learning_rate": 6.726179085133102e-07, |
|
"loss": 0.5101, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.40108491899760185, |
|
"grad_norm": 2.3830676078796387, |
|
"learning_rate": 6.710416172096361e-07, |
|
"loss": 0.489, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.4021321381072562, |
|
"grad_norm": 2.6001055240631104, |
|
"learning_rate": 6.69463398307875e-07, |
|
"loss": 0.5337, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.4031793572169105, |
|
"grad_norm": 2.329277753829956, |
|
"learning_rate": 6.678832695941763e-07, |
|
"loss": 0.469, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.40422657632656483, |
|
"grad_norm": 2.2831122875213623, |
|
"learning_rate": 6.663012488762123e-07, |
|
"loss": 0.5279, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.4052737954362191, |
|
"grad_norm": 2.813821315765381, |
|
"learning_rate": 6.647173539829778e-07, |
|
"loss": 0.4873, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.40632101454587344, |
|
"grad_norm": 2.3835694789886475, |
|
"learning_rate": 6.631316027645892e-07, |
|
"loss": 0.4991, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.40736823365552777, |
|
"grad_norm": 2.7960257530212402, |
|
"learning_rate": 6.615440130920833e-07, |
|
"loss": 0.5366, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.40841545276518204, |
|
"grad_norm": 1.9220885038375854, |
|
"learning_rate": 6.599546028572153e-07, |
|
"loss": 0.5111, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.40946267187483637, |
|
"grad_norm": 2.636683464050293, |
|
"learning_rate": 6.583633899722587e-07, |
|
"loss": 0.5058, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.4105098909844907, |
|
"grad_norm": 2.0583505630493164, |
|
"learning_rate": 6.567703923698013e-07, |
|
"loss": 0.4796, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.411557110094145, |
|
"grad_norm": 3.092818021774292, |
|
"learning_rate": 6.551756280025453e-07, |
|
"loss": 0.5181, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.4126043292037993, |
|
"grad_norm": 2.689857006072998, |
|
"learning_rate": 6.535791148431031e-07, |
|
"loss": 0.5424, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.41365154831345363, |
|
"grad_norm": 1.4727122783660889, |
|
"learning_rate": 6.519808708837958e-07, |
|
"loss": 0.5257, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.41469876742310796, |
|
"grad_norm": 2.4704394340515137, |
|
"learning_rate": 6.503809141364506e-07, |
|
"loss": 0.5043, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.41574598653276224, |
|
"grad_norm": 2.2205686569213867, |
|
"learning_rate": 6.487792626321969e-07, |
|
"loss": 0.4732, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.41679320564241656, |
|
"grad_norm": 4.539642333984375, |
|
"learning_rate": 6.471759344212637e-07, |
|
"loss": 0.5028, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.4178404247520709, |
|
"grad_norm": 3.22900652885437, |
|
"learning_rate": 6.455709475727764e-07, |
|
"loss": 0.4802, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.41888764386172517, |
|
"grad_norm": 1.7866666316986084, |
|
"learning_rate": 6.439643201745524e-07, |
|
"loss": 0.4677, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.4199348629713795, |
|
"grad_norm": 1.5298930406570435, |
|
"learning_rate": 6.423560703328981e-07, |
|
"loss": 0.4663, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.4209820820810338, |
|
"grad_norm": 2.7381436824798584, |
|
"learning_rate": 6.407462161724042e-07, |
|
"loss": 0.5032, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.42202930119068816, |
|
"grad_norm": 1.915801763534546, |
|
"learning_rate": 6.391347758357418e-07, |
|
"loss": 0.4876, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.42307652030034243, |
|
"grad_norm": 2.128645658493042, |
|
"learning_rate": 6.375217674834578e-07, |
|
"loss": 0.4947, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.42412373940999676, |
|
"grad_norm": 2.3809661865234375, |
|
"learning_rate": 6.359072092937702e-07, |
|
"loss": 0.5207, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.4251709585196511, |
|
"grad_norm": 2.089869976043701, |
|
"learning_rate": 6.342911194623636e-07, |
|
"loss": 0.5179, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.42621817762930536, |
|
"grad_norm": 2.531280040740967, |
|
"learning_rate": 6.326735162021832e-07, |
|
"loss": 0.5003, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.4272653967389597, |
|
"grad_norm": 1.5095371007919312, |
|
"learning_rate": 6.310544177432308e-07, |
|
"loss": 0.475, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.428312615848614, |
|
"grad_norm": 3.487618923187256, |
|
"learning_rate": 6.294338423323584e-07, |
|
"loss": 0.5382, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.4293598349582683, |
|
"grad_norm": 3.1474342346191406, |
|
"learning_rate": 6.27811808233063e-07, |
|
"loss": 0.5147, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.4304070540679226, |
|
"grad_norm": 3.5564653873443604, |
|
"learning_rate": 6.261883337252808e-07, |
|
"loss": 0.5062, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.43145427317757695, |
|
"grad_norm": 2.47421932220459, |
|
"learning_rate": 6.245634371051808e-07, |
|
"loss": 0.5364, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.4325014922872313, |
|
"grad_norm": 1.5858722925186157, |
|
"learning_rate": 6.22937136684959e-07, |
|
"loss": 0.5319, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.43354871139688556, |
|
"grad_norm": 2.9193403720855713, |
|
"learning_rate": 6.21309450792632e-07, |
|
"loss": 0.486, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.4345959305065399, |
|
"grad_norm": 1.9017012119293213, |
|
"learning_rate": 6.1968039777183e-07, |
|
"loss": 0.5445, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.4356431496161942, |
|
"grad_norm": 2.5207788944244385, |
|
"learning_rate": 6.180499959815908e-07, |
|
"loss": 0.5274, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.4366903687258485, |
|
"grad_norm": 2.239696979522705, |
|
"learning_rate": 6.164182637961521e-07, |
|
"loss": 0.5056, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.4377375878355028, |
|
"grad_norm": 2.565997838973999, |
|
"learning_rate": 6.147852196047455e-07, |
|
"loss": 0.508, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.43878480694515715, |
|
"grad_norm": 1.4207922220230103, |
|
"learning_rate": 6.131508818113878e-07, |
|
"loss": 0.4964, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.4398320260548115, |
|
"grad_norm": 2.6042516231536865, |
|
"learning_rate": 6.11515268834675e-07, |
|
"loss": 0.5008, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.44087924516446575, |
|
"grad_norm": 2.077496290206909, |
|
"learning_rate": 6.098783991075736e-07, |
|
"loss": 0.4964, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.4419264642741201, |
|
"grad_norm": 2.444882392883301, |
|
"learning_rate": 6.082402910772137e-07, |
|
"loss": 0.493, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.4429736833837744, |
|
"grad_norm": 3.973526954650879, |
|
"learning_rate": 6.066009632046809e-07, |
|
"loss": 0.5078, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.4440209024934287, |
|
"grad_norm": 2.283217430114746, |
|
"learning_rate": 6.049604339648078e-07, |
|
"loss": 0.4756, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.445068121603083, |
|
"grad_norm": 1.3749598264694214, |
|
"learning_rate": 6.033187218459665e-07, |
|
"loss": 0.494, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.44611534071273734, |
|
"grad_norm": 3.739201068878174, |
|
"learning_rate": 6.016758453498592e-07, |
|
"loss": 0.4977, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.4471625598223916, |
|
"grad_norm": 2.5676069259643555, |
|
"learning_rate": 6.00031822991311e-07, |
|
"loss": 0.4691, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.44820977893204594, |
|
"grad_norm": 2.269869089126587, |
|
"learning_rate": 5.983866732980607e-07, |
|
"loss": 0.5088, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.4492569980417003, |
|
"grad_norm": 1.8404080867767334, |
|
"learning_rate": 5.96740414810551e-07, |
|
"loss": 0.4666, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.4503042171513546, |
|
"grad_norm": 2.3597822189331055, |
|
"learning_rate": 5.950930660817214e-07, |
|
"loss": 0.4976, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.4513514362610089, |
|
"grad_norm": 1.5849223136901855, |
|
"learning_rate": 5.934446456767977e-07, |
|
"loss": 0.5176, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.4523986553706632, |
|
"grad_norm": 1.3389567136764526, |
|
"learning_rate": 5.917951721730834e-07, |
|
"loss": 0.5244, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.45344587448031753, |
|
"grad_norm": 2.6399717330932617, |
|
"learning_rate": 5.901446641597498e-07, |
|
"loss": 0.5227, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.4544930935899718, |
|
"grad_norm": 2.2782344818115234, |
|
"learning_rate": 5.884931402376274e-07, |
|
"loss": 0.5351, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.45554031269962614, |
|
"grad_norm": 4.411149024963379, |
|
"learning_rate": 5.868406190189955e-07, |
|
"loss": 0.4855, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.45658753180928047, |
|
"grad_norm": 2.243643045425415, |
|
"learning_rate": 5.851871191273726e-07, |
|
"loss": 0.5299, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.4576347509189348, |
|
"grad_norm": 2.678518533706665, |
|
"learning_rate": 5.835326591973068e-07, |
|
"loss": 0.5615, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.45868197002858907, |
|
"grad_norm": 2.2850341796875, |
|
"learning_rate": 5.818772578741654e-07, |
|
"loss": 0.5314, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.4597291891382434, |
|
"grad_norm": 2.199620246887207, |
|
"learning_rate": 5.802209338139253e-07, |
|
"loss": 0.4905, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.46077640824789773, |
|
"grad_norm": 2.532054901123047, |
|
"learning_rate": 5.785637056829619e-07, |
|
"loss": 0.5143, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.461823627357552, |
|
"grad_norm": 1.9873905181884766, |
|
"learning_rate": 5.769055921578399e-07, |
|
"loss": 0.5128, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.46287084646720633, |
|
"grad_norm": 2.033123254776001, |
|
"learning_rate": 5.752466119251018e-07, |
|
"loss": 0.5027, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.46391806557686066, |
|
"grad_norm": 1.890243649482727, |
|
"learning_rate": 5.735867836810575e-07, |
|
"loss": 0.4893, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.46496528468651493, |
|
"grad_norm": 2.7789084911346436, |
|
"learning_rate": 5.719261261315742e-07, |
|
"loss": 0.4804, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.46601250379616926, |
|
"grad_norm": 2.320241928100586, |
|
"learning_rate": 5.702646579918651e-07, |
|
"loss": 0.4727, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.4670597229058236, |
|
"grad_norm": 2.557783603668213, |
|
"learning_rate": 5.686023979862784e-07, |
|
"loss": 0.4802, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.4681069420154779, |
|
"grad_norm": 2.0354034900665283, |
|
"learning_rate": 5.669393648480861e-07, |
|
"loss": 0.4409, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.4691541611251322, |
|
"grad_norm": 2.6490516662597656, |
|
"learning_rate": 5.652755773192742e-07, |
|
"loss": 0.5116, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.4702013802347865, |
|
"grad_norm": 1.9367735385894775, |
|
"learning_rate": 5.636110541503299e-07, |
|
"loss": 0.51, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.47124859934444085, |
|
"grad_norm": 2.3540682792663574, |
|
"learning_rate": 5.619458141000305e-07, |
|
"loss": 0.5053, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.47229581845409513, |
|
"grad_norm": 2.308772325515747, |
|
"learning_rate": 5.602798759352328e-07, |
|
"loss": 0.4857, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.47334303756374946, |
|
"grad_norm": 2.775662899017334, |
|
"learning_rate": 5.586132584306617e-07, |
|
"loss": 0.5039, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.4743902566734038, |
|
"grad_norm": 2.4968132972717285, |
|
"learning_rate": 5.569459803686971e-07, |
|
"loss": 0.5047, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.4754374757830581, |
|
"grad_norm": 2.3723912239074707, |
|
"learning_rate": 5.552780605391637e-07, |
|
"loss": 0.5022, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.4764846948927124, |
|
"grad_norm": 2.080238103866577, |
|
"learning_rate": 5.53609517739119e-07, |
|
"loss": 0.5139, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.4775319140023667, |
|
"grad_norm": 2.763566732406616, |
|
"learning_rate": 5.519403707726409e-07, |
|
"loss": 0.5269, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.47857913311202105, |
|
"grad_norm": 2.2503960132598877, |
|
"learning_rate": 5.502706384506162e-07, |
|
"loss": 0.5049, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.4796263522216753, |
|
"grad_norm": 2.2146077156066895, |
|
"learning_rate": 5.486003395905284e-07, |
|
"loss": 0.5164, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.48067357133132965, |
|
"grad_norm": 2.077916145324707, |
|
"learning_rate": 5.46929493016246e-07, |
|
"loss": 0.5436, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.481720790440984, |
|
"grad_norm": 2.990812301635742, |
|
"learning_rate": 5.452581175578099e-07, |
|
"loss": 0.4996, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.48276800955063826, |
|
"grad_norm": 2.3420207500457764, |
|
"learning_rate": 5.435862320512216e-07, |
|
"loss": 0.4886, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.4838152286602926, |
|
"grad_norm": 2.182870864868164, |
|
"learning_rate": 5.419138553382303e-07, |
|
"loss": 0.5081, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.4848624477699469, |
|
"grad_norm": 2.5916247367858887, |
|
"learning_rate": 5.402410062661217e-07, |
|
"loss": 0.4863, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.48590966687960124, |
|
"grad_norm": 2.3160765171051025, |
|
"learning_rate": 5.38567703687504e-07, |
|
"loss": 0.55, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.4869568859892555, |
|
"grad_norm": 3.3683152198791504, |
|
"learning_rate": 5.368939664600971e-07, |
|
"loss": 0.4838, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.48800410509890985, |
|
"grad_norm": 1.8857132196426392, |
|
"learning_rate": 5.352198134465188e-07, |
|
"loss": 0.5053, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.4890513242085642, |
|
"grad_norm": 2.4393274784088135, |
|
"learning_rate": 5.335452635140728e-07, |
|
"loss": 0.53, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.49009854331821845, |
|
"grad_norm": 2.8095269203186035, |
|
"learning_rate": 5.318703355345361e-07, |
|
"loss": 0.4955, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.4911457624278728, |
|
"grad_norm": 3.766524076461792, |
|
"learning_rate": 5.301950483839461e-07, |
|
"loss": 0.5033, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.4921929815375271, |
|
"grad_norm": 3.614816665649414, |
|
"learning_rate": 5.285194209423881e-07, |
|
"loss": 0.516, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.49324020064718144, |
|
"grad_norm": 2.2229409217834473, |
|
"learning_rate": 5.268434720937823e-07, |
|
"loss": 0.5158, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.4942874197568357, |
|
"grad_norm": 2.4111645221710205, |
|
"learning_rate": 5.251672207256708e-07, |
|
"loss": 0.5265, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.49533463886649004, |
|
"grad_norm": 1.9818792343139648, |
|
"learning_rate": 5.234906857290057e-07, |
|
"loss": 0.5059, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.49638185797614437, |
|
"grad_norm": 1.8921643495559692, |
|
"learning_rate": 5.218138859979349e-07, |
|
"loss": 0.5281, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.49742907708579864, |
|
"grad_norm": 2.3685996532440186, |
|
"learning_rate": 5.201368404295899e-07, |
|
"loss": 0.5257, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.498476296195453, |
|
"grad_norm": 3.2099828720092773, |
|
"learning_rate": 5.184595679238732e-07, |
|
"loss": 0.4806, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.4995235153051073, |
|
"grad_norm": 2.328226089477539, |
|
"learning_rate": 5.167820873832445e-07, |
|
"loss": 0.5496, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.5005707344147616, |
|
"grad_norm": 2.010138988494873, |
|
"learning_rate": 5.151044177125077e-07, |
|
"loss": 0.5025, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.501617953524416, |
|
"grad_norm": 2.0107200145721436, |
|
"learning_rate": 5.134265778185984e-07, |
|
"loss": 0.4695, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.5026651726340702, |
|
"grad_norm": 3.73002552986145, |
|
"learning_rate": 5.117485866103707e-07, |
|
"loss": 0.5489, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.5037123917437245, |
|
"grad_norm": 1.203131914138794, |
|
"learning_rate": 5.100704629983842e-07, |
|
"loss": 0.4918, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.5047596108533788, |
|
"grad_norm": 2.464951276779175, |
|
"learning_rate": 5.083922258946899e-07, |
|
"loss": 0.526, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.5058068299630332, |
|
"grad_norm": 2.5923502445220947, |
|
"learning_rate": 5.067138942126185e-07, |
|
"loss": 0.5094, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.5068540490726875, |
|
"grad_norm": 2.553731918334961, |
|
"learning_rate": 5.050354868665663e-07, |
|
"loss": 0.5116, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.5079012681823418, |
|
"grad_norm": 2.171161413192749, |
|
"learning_rate": 5.033570227717823e-07, |
|
"loss": 0.5021, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.5089484872919962, |
|
"grad_norm": 1.9675207138061523, |
|
"learning_rate": 5.016785208441553e-07, |
|
"loss": 0.4759, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.5099957064016504, |
|
"grad_norm": 2.772975206375122, |
|
"learning_rate": 5e-07, |
|
"loss": 0.504, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.5110429255113047, |
|
"grad_norm": 1.8081309795379639, |
|
"learning_rate": 4.983214791558449e-07, |
|
"loss": 0.4884, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.512090144620959, |
|
"grad_norm": 2.1011574268341064, |
|
"learning_rate": 4.966429772282177e-07, |
|
"loss": 0.5411, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.5131373637306134, |
|
"grad_norm": 1.7532665729522705, |
|
"learning_rate": 4.949645131334338e-07, |
|
"loss": 0.5217, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.5141845828402677, |
|
"grad_norm": 1.9248243570327759, |
|
"learning_rate": 4.932861057873817e-07, |
|
"loss": 0.5161, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.515231801949922, |
|
"grad_norm": 2.180882692337036, |
|
"learning_rate": 4.916077741053101e-07, |
|
"loss": 0.4977, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.5162790210595763, |
|
"grad_norm": 2.663121223449707, |
|
"learning_rate": 4.899295370016159e-07, |
|
"loss": 0.4918, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.5173262401692306, |
|
"grad_norm": 1.928085446357727, |
|
"learning_rate": 4.882514133896293e-07, |
|
"loss": 0.4863, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.5183734592788849, |
|
"grad_norm": 2.9963412284851074, |
|
"learning_rate": 4.865734221814016e-07, |
|
"loss": 0.5015, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.5194206783885392, |
|
"grad_norm": 2.45681095123291, |
|
"learning_rate": 4.848955822874924e-07, |
|
"loss": 0.5285, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.5204678974981936, |
|
"grad_norm": 1.8462231159210205, |
|
"learning_rate": 4.832179126167556e-07, |
|
"loss": 0.467, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.5215151166078479, |
|
"grad_norm": 2.27242374420166, |
|
"learning_rate": 4.815404320761267e-07, |
|
"loss": 0.4681, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.5225623357175022, |
|
"grad_norm": 2.18723201751709, |
|
"learning_rate": 4.7986315957041e-07, |
|
"loss": 0.5005, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.5236095548271564, |
|
"grad_norm": 3.0114426612854004, |
|
"learning_rate": 4.781861140020652e-07, |
|
"loss": 0.4861, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.5246567739368108, |
|
"grad_norm": 2.07069730758667, |
|
"learning_rate": 4.765093142709943e-07, |
|
"loss": 0.4648, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.5257039930464651, |
|
"grad_norm": 2.2993671894073486, |
|
"learning_rate": 4.7483277927432924e-07, |
|
"loss": 0.4835, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.5267512121561194, |
|
"grad_norm": 2.224874258041382, |
|
"learning_rate": 4.731565279062179e-07, |
|
"loss": 0.4642, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.5277984312657737, |
|
"grad_norm": 1.7376128435134888, |
|
"learning_rate": 4.7148057905761187e-07, |
|
"loss": 0.4883, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.5288456503754281, |
|
"grad_norm": 3.3602840900421143, |
|
"learning_rate": 4.698049516160539e-07, |
|
"loss": 0.4762, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.5298928694850824, |
|
"grad_norm": 1.7802869081497192, |
|
"learning_rate": 4.681296644654639e-07, |
|
"loss": 0.5264, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.5309400885947366, |
|
"grad_norm": 1.8603919744491577, |
|
"learning_rate": 4.6645473648592716e-07, |
|
"loss": 0.4902, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.531987307704391, |
|
"grad_norm": 2.204157590866089, |
|
"learning_rate": 4.647801865534813e-07, |
|
"loss": 0.4835, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.5330345268140453, |
|
"grad_norm": 1.2694624662399292, |
|
"learning_rate": 4.63106033539903e-07, |
|
"loss": 0.5238, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.5340817459236996, |
|
"grad_norm": 2.0624773502349854, |
|
"learning_rate": 4.6143229631249596e-07, |
|
"loss": 0.5033, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.5351289650333539, |
|
"grad_norm": 1.9012243747711182, |
|
"learning_rate": 4.597589937338784e-07, |
|
"loss": 0.5076, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.5361761841430083, |
|
"grad_norm": 2.1069536209106445, |
|
"learning_rate": 4.580861446617698e-07, |
|
"loss": 0.5171, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.5372234032526626, |
|
"grad_norm": 1.5368138551712036, |
|
"learning_rate": 4.564137679487785e-07, |
|
"loss": 0.4803, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.5382706223623168, |
|
"grad_norm": 1.5406559705734253, |
|
"learning_rate": 4.5474188244219006e-07, |
|
"loss": 0.4839, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.5393178414719711, |
|
"grad_norm": 1.4071673154830933, |
|
"learning_rate": 4.530705069837542e-07, |
|
"loss": 0.4764, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.5403650605816255, |
|
"grad_norm": 2.699596643447876, |
|
"learning_rate": 4.513996604094716e-07, |
|
"loss": 0.5177, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.5414122796912798, |
|
"grad_norm": 1.542262315750122, |
|
"learning_rate": 4.497293615493838e-07, |
|
"loss": 0.508, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.5424594988009341, |
|
"grad_norm": 3.0482521057128906, |
|
"learning_rate": 4.480596292273592e-07, |
|
"loss": 0.5303, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.5435067179105885, |
|
"grad_norm": 2.214055061340332, |
|
"learning_rate": 4.463904822608809e-07, |
|
"loss": 0.4843, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.5445539370202428, |
|
"grad_norm": 2.4003210067749023, |
|
"learning_rate": 4.4472193946083634e-07, |
|
"loss": 0.5024, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.545601156129897, |
|
"grad_norm": 2.2942888736724854, |
|
"learning_rate": 4.430540196313031e-07, |
|
"loss": 0.5073, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.5466483752395513, |
|
"grad_norm": 2.4813528060913086, |
|
"learning_rate": 4.413867415693383e-07, |
|
"loss": 0.5114, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.5476955943492057, |
|
"grad_norm": 1.8171602487564087, |
|
"learning_rate": 4.3972012406476715e-07, |
|
"loss": 0.4714, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.54874281345886, |
|
"grad_norm": 2.677717924118042, |
|
"learning_rate": 4.3805418589996967e-07, |
|
"loss": 0.5277, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.5497900325685143, |
|
"grad_norm": 2.815244674682617, |
|
"learning_rate": 4.363889458496701e-07, |
|
"loss": 0.4969, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.5508372516781687, |
|
"grad_norm": 2.719905376434326, |
|
"learning_rate": 4.347244226807257e-07, |
|
"loss": 0.494, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.551884470787823, |
|
"grad_norm": 2.277196168899536, |
|
"learning_rate": 4.3306063515191384e-07, |
|
"loss": 0.4989, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.5529316898974772, |
|
"grad_norm": 2.747807741165161, |
|
"learning_rate": 4.3139760201372166e-07, |
|
"loss": 0.475, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.5539789090071315, |
|
"grad_norm": 2.1879899501800537, |
|
"learning_rate": 4.29735342008135e-07, |
|
"loss": 0.4727, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.5550261281167859, |
|
"grad_norm": 1.5891708135604858, |
|
"learning_rate": 4.280738738684259e-07, |
|
"loss": 0.5209, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.5560733472264402, |
|
"grad_norm": 2.6258082389831543, |
|
"learning_rate": 4.2641321631894256e-07, |
|
"loss": 0.5146, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.5571205663360945, |
|
"grad_norm": 2.106497287750244, |
|
"learning_rate": 4.2475338807489825e-07, |
|
"loss": 0.5072, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.5581677854457489, |
|
"grad_norm": 1.3520596027374268, |
|
"learning_rate": 4.2309440784216014e-07, |
|
"loss": 0.5007, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.5592150045554031, |
|
"grad_norm": 2.2585766315460205, |
|
"learning_rate": 4.21436294317038e-07, |
|
"loss": 0.5661, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.5602622236650574, |
|
"grad_norm": 2.4655063152313232, |
|
"learning_rate": 4.1977906618607473e-07, |
|
"loss": 0.5057, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.5613094427747117, |
|
"grad_norm": 1.7120404243469238, |
|
"learning_rate": 4.181227421258344e-07, |
|
"loss": 0.4762, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.5623566618843661, |
|
"grad_norm": 2.365668535232544, |
|
"learning_rate": 4.164673408026932e-07, |
|
"loss": 0.5015, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.5634038809940204, |
|
"grad_norm": 2.5297205448150635, |
|
"learning_rate": 4.148128808726274e-07, |
|
"loss": 0.4789, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.5644511001036747, |
|
"grad_norm": 2.997265577316284, |
|
"learning_rate": 4.131593809810044e-07, |
|
"loss": 0.4841, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.565498319213329, |
|
"grad_norm": 2.2408447265625, |
|
"learning_rate": 4.1150685976237253e-07, |
|
"loss": 0.5194, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.5665455383229833, |
|
"grad_norm": 1.8267594575881958, |
|
"learning_rate": 4.098553358402503e-07, |
|
"loss": 0.4978, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.5675927574326376, |
|
"grad_norm": 3.2854866981506348, |
|
"learning_rate": 4.0820482782691666e-07, |
|
"loss": 0.499, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.5686399765422919, |
|
"grad_norm": 2.401383638381958, |
|
"learning_rate": 4.0655535432320225e-07, |
|
"loss": 0.539, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.5696871956519463, |
|
"grad_norm": 2.3308005332946777, |
|
"learning_rate": 4.0490693391827867e-07, |
|
"loss": 0.527, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.5707344147616006, |
|
"grad_norm": 2.6808366775512695, |
|
"learning_rate": 4.0325958518944893e-07, |
|
"loss": 0.4965, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.5717816338712549, |
|
"grad_norm": 2.82200026512146, |
|
"learning_rate": 4.016133267019394e-07, |
|
"loss": 0.5051, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.5728288529809092, |
|
"grad_norm": 3.023541212081909, |
|
"learning_rate": 3.99968177008689e-07, |
|
"loss": 0.4623, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.5738760720905635, |
|
"grad_norm": 2.405120372772217, |
|
"learning_rate": 3.983241546501408e-07, |
|
"loss": 0.5096, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.5749232912002178, |
|
"grad_norm": 1.9728878736495972, |
|
"learning_rate": 3.9668127815403353e-07, |
|
"loss": 0.5405, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.5759705103098721, |
|
"grad_norm": 3.312455415725708, |
|
"learning_rate": 3.950395660351922e-07, |
|
"loss": 0.5245, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.5770177294195264, |
|
"grad_norm": 1.9875174760818481, |
|
"learning_rate": 3.93399036795319e-07, |
|
"loss": 0.4863, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.5780649485291808, |
|
"grad_norm": 2.295588731765747, |
|
"learning_rate": 3.917597089227863e-07, |
|
"loss": 0.4868, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.5791121676388351, |
|
"grad_norm": 2.505709409713745, |
|
"learning_rate": 3.901216008924265e-07, |
|
"loss": 0.4955, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.5801593867484894, |
|
"grad_norm": 2.177341938018799, |
|
"learning_rate": 3.88484731165325e-07, |
|
"loss": 0.5103, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.5812066058581437, |
|
"grad_norm": 1.426915168762207, |
|
"learning_rate": 3.868491181886122e-07, |
|
"loss": 0.5235, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.582253824967798, |
|
"grad_norm": 2.258373498916626, |
|
"learning_rate": 3.852147803952545e-07, |
|
"loss": 0.4983, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.5833010440774523, |
|
"grad_norm": 2.660693645477295, |
|
"learning_rate": 3.835817362038477e-07, |
|
"loss": 0.5127, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.5843482631871066, |
|
"grad_norm": 2.2097291946411133, |
|
"learning_rate": 3.8195000401840927e-07, |
|
"loss": 0.5034, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.585395482296761, |
|
"grad_norm": 2.2298669815063477, |
|
"learning_rate": 3.803196022281701e-07, |
|
"loss": 0.4971, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.5864427014064153, |
|
"grad_norm": 2.1946804523468018, |
|
"learning_rate": 3.78690549207368e-07, |
|
"loss": 0.4942, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.5874899205160696, |
|
"grad_norm": 3.2329068183898926, |
|
"learning_rate": 3.77062863315041e-07, |
|
"loss": 0.513, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.5885371396257238, |
|
"grad_norm": 1.839722752571106, |
|
"learning_rate": 3.7543656289481927e-07, |
|
"loss": 0.5546, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.5895843587353782, |
|
"grad_norm": 2.5834665298461914, |
|
"learning_rate": 3.7381166627471914e-07, |
|
"loss": 0.4821, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 0.5906315778450325, |
|
"grad_norm": 2.00166916847229, |
|
"learning_rate": 3.7218819176693693e-07, |
|
"loss": 0.5187, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.5916787969546868, |
|
"grad_norm": 3.0043110847473145, |
|
"learning_rate": 3.7056615766764174e-07, |
|
"loss": 0.5227, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.5927260160643412, |
|
"grad_norm": 1.637872576713562, |
|
"learning_rate": 3.6894558225676924e-07, |
|
"loss": 0.4611, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.5937732351739955, |
|
"grad_norm": 2.64483904838562, |
|
"learning_rate": 3.6732648379781683e-07, |
|
"loss": 0.4792, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 0.5948204542836497, |
|
"grad_norm": 1.7451013326644897, |
|
"learning_rate": 3.657088805376366e-07, |
|
"loss": 0.5322, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.595867673393304, |
|
"grad_norm": 2.465116500854492, |
|
"learning_rate": 3.640927907062297e-07, |
|
"loss": 0.4657, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 0.5969148925029584, |
|
"grad_norm": 3.788491725921631, |
|
"learning_rate": 3.624782325165421e-07, |
|
"loss": 0.4855, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.5979621116126127, |
|
"grad_norm": 2.519657850265503, |
|
"learning_rate": 3.6086522416425823e-07, |
|
"loss": 0.5125, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 0.599009330722267, |
|
"grad_norm": 1.8677030801773071, |
|
"learning_rate": 3.5925378382759577e-07, |
|
"loss": 0.498, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.6000565498319214, |
|
"grad_norm": 1.9577298164367676, |
|
"learning_rate": 3.57643929667102e-07, |
|
"loss": 0.4792, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 0.6011037689415757, |
|
"grad_norm": 2.364872932434082, |
|
"learning_rate": 3.560356798254477e-07, |
|
"loss": 0.4882, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.6021509880512299, |
|
"grad_norm": 2.4925103187561035, |
|
"learning_rate": 3.5442905242722365e-07, |
|
"loss": 0.4825, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.6031982071608842, |
|
"grad_norm": 2.7740890979766846, |
|
"learning_rate": 3.5282406557873635e-07, |
|
"loss": 0.5345, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.6042454262705386, |
|
"grad_norm": 1.0781739950180054, |
|
"learning_rate": 3.512207373678032e-07, |
|
"loss": 0.4665, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 0.6052926453801929, |
|
"grad_norm": 2.9016547203063965, |
|
"learning_rate": 3.496190858635494e-07, |
|
"loss": 0.4655, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.6063398644898472, |
|
"grad_norm": 0.917265772819519, |
|
"learning_rate": 3.480191291162041e-07, |
|
"loss": 0.4707, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 0.6073870835995016, |
|
"grad_norm": 1.5372905731201172, |
|
"learning_rate": 3.4642088515689695e-07, |
|
"loss": 0.4867, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.6084343027091559, |
|
"grad_norm": 1.8536443710327148, |
|
"learning_rate": 3.4482437199745463e-07, |
|
"loss": 0.4746, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 0.6094815218188101, |
|
"grad_norm": 2.8087878227233887, |
|
"learning_rate": 3.432296076301986e-07, |
|
"loss": 0.5529, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.6105287409284644, |
|
"grad_norm": 1.8362385034561157, |
|
"learning_rate": 3.416366100277414e-07, |
|
"loss": 0.4911, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 0.6115759600381188, |
|
"grad_norm": 1.9666386842727661, |
|
"learning_rate": 3.4004539714278457e-07, |
|
"loss": 0.4902, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.6126231791477731, |
|
"grad_norm": 1.745953917503357, |
|
"learning_rate": 3.3845598690791675e-07, |
|
"loss": 0.5204, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.6136703982574274, |
|
"grad_norm": 1.9354580640792847, |
|
"learning_rate": 3.368683972354108e-07, |
|
"loss": 0.4763, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.6147176173670817, |
|
"grad_norm": 2.232057809829712, |
|
"learning_rate": 3.3528264601702217e-07, |
|
"loss": 0.5116, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 0.6157648364767361, |
|
"grad_norm": 2.1513118743896484, |
|
"learning_rate": 3.336987511237877e-07, |
|
"loss": 0.539, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.6168120555863903, |
|
"grad_norm": 1.7164148092269897, |
|
"learning_rate": 3.321167304058238e-07, |
|
"loss": 0.4912, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 0.6178592746960446, |
|
"grad_norm": 2.390707015991211, |
|
"learning_rate": 3.305366016921249e-07, |
|
"loss": 0.5207, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.618906493805699, |
|
"grad_norm": 1.944360613822937, |
|
"learning_rate": 3.289583827903639e-07, |
|
"loss": 0.4786, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 0.6199537129153533, |
|
"grad_norm": 3.611234426498413, |
|
"learning_rate": 3.2738209148668996e-07, |
|
"loss": 0.5597, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.6210009320250076, |
|
"grad_norm": 2.125988245010376, |
|
"learning_rate": 3.2580774554552834e-07, |
|
"loss": 0.5064, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 0.6220481511346619, |
|
"grad_norm": 2.2751822471618652, |
|
"learning_rate": 3.242353627093817e-07, |
|
"loss": 0.4839, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.6230953702443163, |
|
"grad_norm": 2.4632444381713867, |
|
"learning_rate": 3.226649606986277e-07, |
|
"loss": 0.5085, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.6241425893539705, |
|
"grad_norm": 2.596140146255493, |
|
"learning_rate": 3.210965572113211e-07, |
|
"loss": 0.4834, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.6251898084636248, |
|
"grad_norm": 3.1402766704559326, |
|
"learning_rate": 3.195301699229943e-07, |
|
"loss": 0.4894, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 0.6262370275732791, |
|
"grad_norm": 1.3100465536117554, |
|
"learning_rate": 3.179658164864567e-07, |
|
"loss": 0.5371, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.6272842466829335, |
|
"grad_norm": 2.2746660709381104, |
|
"learning_rate": 3.164035145315971e-07, |
|
"loss": 0.4865, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 0.6283314657925878, |
|
"grad_norm": 2.2843546867370605, |
|
"learning_rate": 3.14843281665185e-07, |
|
"loss": 0.4958, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.6293786849022421, |
|
"grad_norm": 2.045327663421631, |
|
"learning_rate": 3.132851354706709e-07, |
|
"loss": 0.4747, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 0.6304259040118964, |
|
"grad_norm": 2.59464430809021, |
|
"learning_rate": 3.117290935079895e-07, |
|
"loss": 0.4927, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.6314731231215507, |
|
"grad_norm": 1.8439029455184937, |
|
"learning_rate": 3.1017517331336175e-07, |
|
"loss": 0.4829, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 0.632520342231205, |
|
"grad_norm": 2.155336618423462, |
|
"learning_rate": 3.0862339239909587e-07, |
|
"loss": 0.4764, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.6335675613408593, |
|
"grad_norm": 2.2298882007598877, |
|
"learning_rate": 3.070737682533913e-07, |
|
"loss": 0.5267, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.6346147804505137, |
|
"grad_norm": 1.9075183868408203, |
|
"learning_rate": 3.0552631834014153e-07, |
|
"loss": 0.5101, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.635661999560168, |
|
"grad_norm": 2.1493678092956543, |
|
"learning_rate": 3.039810600987367e-07, |
|
"loss": 0.455, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 0.6367092186698223, |
|
"grad_norm": 1.9552183151245117, |
|
"learning_rate": 3.024380109438669e-07, |
|
"loss": 0.511, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.6377564377794765, |
|
"grad_norm": 2.0828135013580322, |
|
"learning_rate": 3.0089718826532727e-07, |
|
"loss": 0.4816, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 0.6388036568891309, |
|
"grad_norm": 1.6887547969818115, |
|
"learning_rate": 2.9935860942782055e-07, |
|
"loss": 0.4874, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.6398508759987852, |
|
"grad_norm": 1.987060785293579, |
|
"learning_rate": 2.978222917707616e-07, |
|
"loss": 0.5237, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 0.6408980951084395, |
|
"grad_norm": 1.8471943140029907, |
|
"learning_rate": 2.9628825260808313e-07, |
|
"loss": 0.4864, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.6419453142180939, |
|
"grad_norm": 2.424875497817993, |
|
"learning_rate": 2.9475650922803907e-07, |
|
"loss": 0.4865, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 0.6429925333277482, |
|
"grad_norm": 1.9071121215820312, |
|
"learning_rate": 2.9322707889301066e-07, |
|
"loss": 0.5097, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.6440397524374025, |
|
"grad_norm": 1.9200624227523804, |
|
"learning_rate": 2.9169997883931205e-07, |
|
"loss": 0.4865, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.6450869715470567, |
|
"grad_norm": 1.8281010389328003, |
|
"learning_rate": 2.90175226276995e-07, |
|
"loss": 0.4923, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.6461341906567111, |
|
"grad_norm": 2.7019853591918945, |
|
"learning_rate": 2.886528383896559e-07, |
|
"loss": 0.4702, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 0.6471814097663654, |
|
"grad_norm": 1.542846918106079, |
|
"learning_rate": 2.87132832334242e-07, |
|
"loss": 0.5025, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.6482286288760197, |
|
"grad_norm": 3.2872512340545654, |
|
"learning_rate": 2.856152252408578e-07, |
|
"loss": 0.4896, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 0.6492758479856741, |
|
"grad_norm": 3.8048501014709473, |
|
"learning_rate": 2.841000342125719e-07, |
|
"loss": 0.4723, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.6503230670953284, |
|
"grad_norm": 2.0907108783721924, |
|
"learning_rate": 2.825872763252245e-07, |
|
"loss": 0.5326, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 0.6513702862049827, |
|
"grad_norm": 2.4722342491149902, |
|
"learning_rate": 2.81076968627235e-07, |
|
"loss": 0.4774, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.6524175053146369, |
|
"grad_norm": 2.449239492416382, |
|
"learning_rate": 2.7956912813940947e-07, |
|
"loss": 0.47, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 0.6534647244242913, |
|
"grad_norm": 2.0104002952575684, |
|
"learning_rate": 2.7806377185474953e-07, |
|
"loss": 0.5017, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 0.6545119435339456, |
|
"grad_norm": 2.3968191146850586, |
|
"learning_rate": 2.765609167382602e-07, |
|
"loss": 0.489, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.6555591626435999, |
|
"grad_norm": 2.0325634479522705, |
|
"learning_rate": 2.750605797267587e-07, |
|
"loss": 0.5153, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 0.6566063817532543, |
|
"grad_norm": 2.9563980102539062, |
|
"learning_rate": 2.7356277772868427e-07, |
|
"loss": 0.5121, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 0.6576536008629086, |
|
"grad_norm": 1.5260460376739502, |
|
"learning_rate": 2.7206752762390684e-07, |
|
"loss": 0.5009, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 0.6587008199725629, |
|
"grad_norm": 2.651346206665039, |
|
"learning_rate": 2.7057484626353717e-07, |
|
"loss": 0.4819, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 0.6597480390822171, |
|
"grad_norm": 2.392993927001953, |
|
"learning_rate": 2.69084750469737e-07, |
|
"loss": 0.4924, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.6607952581918715, |
|
"grad_norm": 2.065648078918457, |
|
"learning_rate": 2.6759725703552916e-07, |
|
"loss": 0.4576, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 0.6618424773015258, |
|
"grad_norm": 1.6166179180145264, |
|
"learning_rate": 2.661123827246088e-07, |
|
"loss": 0.5187, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 0.6628896964111801, |
|
"grad_norm": 2.0667145252227783, |
|
"learning_rate": 2.646301442711538e-07, |
|
"loss": 0.4963, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 0.6639369155208344, |
|
"grad_norm": 3.5013437271118164, |
|
"learning_rate": 2.6315055837963687e-07, |
|
"loss": 0.5027, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 0.6649841346304888, |
|
"grad_norm": 0.9413002133369446, |
|
"learning_rate": 2.616736417246368e-07, |
|
"loss": 0.4712, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.666031353740143, |
|
"grad_norm": 1.4072952270507812, |
|
"learning_rate": 2.601994109506508e-07, |
|
"loss": 0.4731, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 0.6670785728497973, |
|
"grad_norm": 2.4212138652801514, |
|
"learning_rate": 2.587278826719069e-07, |
|
"loss": 0.4828, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 0.6681257919594517, |
|
"grad_norm": 1.7635606527328491, |
|
"learning_rate": 2.5725907347217655e-07, |
|
"loss": 0.4863, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 0.669173011069106, |
|
"grad_norm": 2.0671000480651855, |
|
"learning_rate": 2.5579299990458785e-07, |
|
"loss": 0.4636, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 0.6702202301787603, |
|
"grad_norm": 2.378913402557373, |
|
"learning_rate": 2.5432967849143906e-07, |
|
"loss": 0.4766, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.6712674492884146, |
|
"grad_norm": 3.7450199127197266, |
|
"learning_rate": 2.528691257240122e-07, |
|
"loss": 0.5137, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 0.672314668398069, |
|
"grad_norm": 2.676037073135376, |
|
"learning_rate": 2.514113580623873e-07, |
|
"loss": 0.4933, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 0.6733618875077232, |
|
"grad_norm": 1.6275851726531982, |
|
"learning_rate": 2.499563919352572e-07, |
|
"loss": 0.5038, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 0.6744091066173775, |
|
"grad_norm": 2.475569009780884, |
|
"learning_rate": 2.485042437397418e-07, |
|
"loss": 0.4518, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 0.6754563257270318, |
|
"grad_norm": 3.2226366996765137, |
|
"learning_rate": 2.470549298412036e-07, |
|
"loss": 0.4634, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.6765035448366862, |
|
"grad_norm": 2.9092655181884766, |
|
"learning_rate": 2.456084665730634e-07, |
|
"loss": 0.4851, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 0.6775507639463405, |
|
"grad_norm": 1.9740290641784668, |
|
"learning_rate": 2.441648702366161e-07, |
|
"loss": 0.489, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 0.6785979830559948, |
|
"grad_norm": 2.2705118656158447, |
|
"learning_rate": 2.42724157100847e-07, |
|
"loss": 0.4918, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 0.6796452021656492, |
|
"grad_norm": 2.0279767513275146, |
|
"learning_rate": 2.4128634340224767e-07, |
|
"loss": 0.5309, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 0.6806924212753034, |
|
"grad_norm": 2.4952125549316406, |
|
"learning_rate": 2.3985144534463507e-07, |
|
"loss": 0.5253, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.6817396403849577, |
|
"grad_norm": 1.7526471614837646, |
|
"learning_rate": 2.3841947909896675e-07, |
|
"loss": 0.4919, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 0.682786859494612, |
|
"grad_norm": 2.78068208694458, |
|
"learning_rate": 2.369904608031591e-07, |
|
"loss": 0.4678, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 0.6838340786042664, |
|
"grad_norm": 1.9609248638153076, |
|
"learning_rate": 2.3556440656190675e-07, |
|
"loss": 0.5004, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 0.6848812977139207, |
|
"grad_norm": 1.8966784477233887, |
|
"learning_rate": 2.3414133244649965e-07, |
|
"loss": 0.4609, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 0.685928516823575, |
|
"grad_norm": 1.7883254289627075, |
|
"learning_rate": 2.3272125449464197e-07, |
|
"loss": 0.5053, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.6869757359332294, |
|
"grad_norm": 2.0737862586975098, |
|
"learning_rate": 2.3130418871027285e-07, |
|
"loss": 0.5126, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 0.6880229550428836, |
|
"grad_norm": 2.2858548164367676, |
|
"learning_rate": 2.2989015106338456e-07, |
|
"loss": 0.4954, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 0.6890701741525379, |
|
"grad_norm": 2.121546506881714, |
|
"learning_rate": 2.284791574898423e-07, |
|
"loss": 0.5017, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 0.6901173932621922, |
|
"grad_norm": 1.6191834211349487, |
|
"learning_rate": 2.270712238912067e-07, |
|
"loss": 0.4721, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 0.6911646123718466, |
|
"grad_norm": 2.482290506362915, |
|
"learning_rate": 2.2566636613455185e-07, |
|
"loss": 0.5003, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.6922118314815009, |
|
"grad_norm": 2.413865089416504, |
|
"learning_rate": 2.242646000522885e-07, |
|
"loss": 0.4864, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 0.6932590505911552, |
|
"grad_norm": 2.390326738357544, |
|
"learning_rate": 2.228659414419853e-07, |
|
"loss": 0.5155, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 0.6943062697008096, |
|
"grad_norm": 2.158834457397461, |
|
"learning_rate": 2.2147040606618956e-07, |
|
"loss": 0.4972, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 0.6953534888104638, |
|
"grad_norm": 2.767620086669922, |
|
"learning_rate": 2.2007800965225087e-07, |
|
"loss": 0.4651, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 0.6964007079201181, |
|
"grad_norm": 3.050821542739868, |
|
"learning_rate": 2.1868876789214418e-07, |
|
"loss": 0.5146, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.6974479270297724, |
|
"grad_norm": 2.7702839374542236, |
|
"learning_rate": 2.1730269644229104e-07, |
|
"loss": 0.5143, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 0.6984951461394268, |
|
"grad_norm": 2.543748140335083, |
|
"learning_rate": 2.159198109233849e-07, |
|
"loss": 0.5028, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 0.6995423652490811, |
|
"grad_norm": 3.739572048187256, |
|
"learning_rate": 2.1454012692021505e-07, |
|
"loss": 0.5471, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 0.7005895843587354, |
|
"grad_norm": 2.372471809387207, |
|
"learning_rate": 2.131636599814896e-07, |
|
"loss": 0.4978, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 0.7016368034683896, |
|
"grad_norm": 2.276508092880249, |
|
"learning_rate": 2.1179042561966154e-07, |
|
"loss": 0.5153, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.702684022578044, |
|
"grad_norm": 2.0715689659118652, |
|
"learning_rate": 2.1042043931075342e-07, |
|
"loss": 0.5127, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 0.7037312416876983, |
|
"grad_norm": 1.9307739734649658, |
|
"learning_rate": 2.0905371649418318e-07, |
|
"loss": 0.4746, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 0.7047784607973526, |
|
"grad_norm": 2.039501905441284, |
|
"learning_rate": 2.076902725725897e-07, |
|
"loss": 0.4952, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 0.705825679907007, |
|
"grad_norm": 2.397334575653076, |
|
"learning_rate": 2.063301229116597e-07, |
|
"loss": 0.4728, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 0.7068728990166613, |
|
"grad_norm": 3.5085904598236084, |
|
"learning_rate": 2.0497328283995425e-07, |
|
"loss": 0.5176, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.7079201181263156, |
|
"grad_norm": 2.772425651550293, |
|
"learning_rate": 2.0361976764873623e-07, |
|
"loss": 0.5159, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 0.7089673372359698, |
|
"grad_norm": 1.3938500881195068, |
|
"learning_rate": 2.0226959259179794e-07, |
|
"loss": 0.4949, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 0.7100145563456242, |
|
"grad_norm": 2.1697475910186768, |
|
"learning_rate": 2.0092277288528898e-07, |
|
"loss": 0.466, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 0.7110617754552785, |
|
"grad_norm": 1.512786865234375, |
|
"learning_rate": 1.995793237075452e-07, |
|
"loss": 0.5185, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 0.7121089945649328, |
|
"grad_norm": 1.7060164213180542, |
|
"learning_rate": 1.9823926019891724e-07, |
|
"loss": 0.4649, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.7131562136745871, |
|
"grad_norm": 2.2003238201141357, |
|
"learning_rate": 1.9690259746160005e-07, |
|
"loss": 0.4921, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 0.7142034327842415, |
|
"grad_norm": 2.538870096206665, |
|
"learning_rate": 1.9556935055946277e-07, |
|
"loss": 0.5164, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 0.7152506518938958, |
|
"grad_norm": 3.6677184104919434, |
|
"learning_rate": 1.9423953451787888e-07, |
|
"loss": 0.5299, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 0.71629787100355, |
|
"grad_norm": 1.810766339302063, |
|
"learning_rate": 1.929131643235569e-07, |
|
"loss": 0.4917, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 0.7173450901132044, |
|
"grad_norm": 1.973241925239563, |
|
"learning_rate": 1.9159025492437143e-07, |
|
"loss": 0.4827, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.7183923092228587, |
|
"grad_norm": 2.1515488624572754, |
|
"learning_rate": 1.9027082122919474e-07, |
|
"loss": 0.4748, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 0.719439528332513, |
|
"grad_norm": 1.521958827972412, |
|
"learning_rate": 1.8895487810772882e-07, |
|
"loss": 0.5087, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 0.7204867474421673, |
|
"grad_norm": 2.1833043098449707, |
|
"learning_rate": 1.876424403903376e-07, |
|
"loss": 0.4784, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 0.7215339665518217, |
|
"grad_norm": 2.8621373176574707, |
|
"learning_rate": 1.8633352286788011e-07, |
|
"loss": 0.5077, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 0.722581185661476, |
|
"grad_norm": 1.9079474210739136, |
|
"learning_rate": 1.8502814029154367e-07, |
|
"loss": 0.5052, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.7236284047711302, |
|
"grad_norm": 2.184054374694824, |
|
"learning_rate": 1.837263073726769e-07, |
|
"loss": 0.5109, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 0.7246756238807845, |
|
"grad_norm": 2.0883328914642334, |
|
"learning_rate": 1.824280387826258e-07, |
|
"loss": 0.4888, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 0.7257228429904389, |
|
"grad_norm": 2.368727207183838, |
|
"learning_rate": 1.8113334915256663e-07, |
|
"loss": 0.4963, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 0.7267700621000932, |
|
"grad_norm": 2.7945289611816406, |
|
"learning_rate": 1.7984225307334106e-07, |
|
"loss": 0.4927, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 0.7278172812097475, |
|
"grad_norm": 1.937376856803894, |
|
"learning_rate": 1.7855476509529337e-07, |
|
"loss": 0.4741, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.7288645003194019, |
|
"grad_norm": 3.4460761547088623, |
|
"learning_rate": 1.7727089972810505e-07, |
|
"loss": 0.569, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 0.7299117194290562, |
|
"grad_norm": 3.9340882301330566, |
|
"learning_rate": 1.7599067144063086e-07, |
|
"loss": 0.5028, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 0.7309589385387104, |
|
"grad_norm": 3.2756307125091553, |
|
"learning_rate": 1.7471409466073772e-07, |
|
"loss": 0.5238, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 0.7320061576483647, |
|
"grad_norm": 2.0363681316375732, |
|
"learning_rate": 1.7344118377514044e-07, |
|
"loss": 0.5528, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 0.7330533767580191, |
|
"grad_norm": 2.6508500576019287, |
|
"learning_rate": 1.7217195312923944e-07, |
|
"loss": 0.4733, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.7341005958676734, |
|
"grad_norm": 1.832088828086853, |
|
"learning_rate": 1.7090641702696102e-07, |
|
"loss": 0.4909, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 0.7351478149773277, |
|
"grad_norm": 2.644780158996582, |
|
"learning_rate": 1.6964458973059358e-07, |
|
"loss": 0.4928, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 0.7361950340869821, |
|
"grad_norm": 2.407883644104004, |
|
"learning_rate": 1.683864854606289e-07, |
|
"loss": 0.4497, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 0.7372422531966363, |
|
"grad_norm": 2.3634557723999023, |
|
"learning_rate": 1.6713211839560125e-07, |
|
"loss": 0.4738, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 0.7382894723062906, |
|
"grad_norm": 2.401092052459717, |
|
"learning_rate": 1.658815026719269e-07, |
|
"loss": 0.5084, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.7393366914159449, |
|
"grad_norm": 2.105447292327881, |
|
"learning_rate": 1.6463465238374568e-07, |
|
"loss": 0.4681, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 0.7403839105255993, |
|
"grad_norm": 2.5298540592193604, |
|
"learning_rate": 1.633915815827623e-07, |
|
"loss": 0.5149, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 0.7414311296352536, |
|
"grad_norm": 2.3362057209014893, |
|
"learning_rate": 1.621523042780868e-07, |
|
"loss": 0.5225, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 0.7424783487449079, |
|
"grad_norm": 3.7627904415130615, |
|
"learning_rate": 1.6091683443607767e-07, |
|
"loss": 0.4967, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 0.7435255678545623, |
|
"grad_norm": 2.4007790088653564, |
|
"learning_rate": 1.5968518598018483e-07, |
|
"loss": 0.4878, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.7445727869642165, |
|
"grad_norm": 2.1650781631469727, |
|
"learning_rate": 1.5845737279079118e-07, |
|
"loss": 0.502, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 0.7456200060738708, |
|
"grad_norm": 1.9574668407440186, |
|
"learning_rate": 1.5723340870505753e-07, |
|
"loss": 0.4843, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 0.7466672251835251, |
|
"grad_norm": 2.2389516830444336, |
|
"learning_rate": 1.5601330751676624e-07, |
|
"loss": 0.519, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 0.7477144442931795, |
|
"grad_norm": 1.7965580224990845, |
|
"learning_rate": 1.5479708297616567e-07, |
|
"loss": 0.4676, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 0.7487616634028338, |
|
"grad_norm": 2.057460069656372, |
|
"learning_rate": 1.5358474878981526e-07, |
|
"loss": 0.5106, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.7498088825124881, |
|
"grad_norm": 2.1372034549713135, |
|
"learning_rate": 1.5237631862043115e-07, |
|
"loss": 0.4786, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 0.7508561016221424, |
|
"grad_norm": 2.0700478553771973, |
|
"learning_rate": 1.5117180608673203e-07, |
|
"loss": 0.4855, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 0.7519033207317967, |
|
"grad_norm": 1.7832368612289429, |
|
"learning_rate": 1.4997122476328593e-07, |
|
"loss": 0.5188, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 0.752950539841451, |
|
"grad_norm": 3.6390135288238525, |
|
"learning_rate": 1.4877458818035705e-07, |
|
"loss": 0.5304, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 0.7539977589511053, |
|
"grad_norm": 3.022871732711792, |
|
"learning_rate": 1.4758190982375295e-07, |
|
"loss": 0.4648, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.7550449780607597, |
|
"grad_norm": 1.6055036783218384, |
|
"learning_rate": 1.463932031346739e-07, |
|
"loss": 0.5118, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 0.756092197170414, |
|
"grad_norm": 4.166171550750732, |
|
"learning_rate": 1.4520848150955912e-07, |
|
"loss": 0.4986, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 0.7571394162800683, |
|
"grad_norm": 3.3419265747070312, |
|
"learning_rate": 1.44027758299938e-07, |
|
"loss": 0.5049, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 0.7581866353897226, |
|
"grad_norm": 3.171034336090088, |
|
"learning_rate": 1.4285104681227854e-07, |
|
"loss": 0.5091, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 0.7592338544993769, |
|
"grad_norm": 2.6404178142547607, |
|
"learning_rate": 1.4167836030783752e-07, |
|
"loss": 0.5208, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.7602810736090312, |
|
"grad_norm": 2.8442752361297607, |
|
"learning_rate": 1.4050971200251115e-07, |
|
"loss": 0.475, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 0.7613282927186855, |
|
"grad_norm": 1.9694572687149048, |
|
"learning_rate": 1.3934511506668616e-07, |
|
"loss": 0.4477, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 0.7623755118283398, |
|
"grad_norm": 3.6044440269470215, |
|
"learning_rate": 1.3818458262509119e-07, |
|
"loss": 0.4972, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 0.7634227309379942, |
|
"grad_norm": 1.7680317163467407, |
|
"learning_rate": 1.3702812775664917e-07, |
|
"loss": 0.4964, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 0.7644699500476485, |
|
"grad_norm": 1.948326587677002, |
|
"learning_rate": 1.358757634943296e-07, |
|
"loss": 0.4733, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.7655171691573028, |
|
"grad_norm": 2.4567108154296875, |
|
"learning_rate": 1.3472750282500195e-07, |
|
"loss": 0.5247, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 0.766564388266957, |
|
"grad_norm": 1.3387149572372437, |
|
"learning_rate": 1.3358335868928906e-07, |
|
"loss": 0.4894, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 0.7676116073766114, |
|
"grad_norm": 1.793434977531433, |
|
"learning_rate": 1.3244334398142154e-07, |
|
"loss": 0.5103, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 0.7686588264862657, |
|
"grad_norm": 2.429433822631836, |
|
"learning_rate": 1.3130747154909227e-07, |
|
"loss": 0.5304, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 0.76970604559592, |
|
"grad_norm": 2.3653488159179688, |
|
"learning_rate": 1.3017575419331173e-07, |
|
"loss": 0.5092, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.7707532647055744, |
|
"grad_norm": 3.5659842491149902, |
|
"learning_rate": 1.2904820466826355e-07, |
|
"loss": 0.4835, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 0.7718004838152287, |
|
"grad_norm": 2.952862501144409, |
|
"learning_rate": 1.279248356811611e-07, |
|
"loss": 0.5015, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 0.7728477029248829, |
|
"grad_norm": 2.398303508758545, |
|
"learning_rate": 1.2680565989210385e-07, |
|
"loss": 0.4938, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 0.7738949220345372, |
|
"grad_norm": 2.317095994949341, |
|
"learning_rate": 1.2569068991393523e-07, |
|
"loss": 0.4617, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 0.7749421411441916, |
|
"grad_norm": 2.453432559967041, |
|
"learning_rate": 1.2457993831209989e-07, |
|
"loss": 0.5198, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.7759893602538459, |
|
"grad_norm": 1.8672329187393188, |
|
"learning_rate": 1.2347341760450263e-07, |
|
"loss": 0.4742, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 0.7770365793635002, |
|
"grad_norm": 3.076641798019409, |
|
"learning_rate": 1.223711402613669e-07, |
|
"loss": 0.4928, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 0.7780837984731546, |
|
"grad_norm": 2.7013864517211914, |
|
"learning_rate": 1.212731187050946e-07, |
|
"loss": 0.4565, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 0.7791310175828089, |
|
"grad_norm": 3.7489242553710938, |
|
"learning_rate": 1.2017936531012574e-07, |
|
"loss": 0.5017, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 0.7801782366924631, |
|
"grad_norm": 2.7046327590942383, |
|
"learning_rate": 1.1908989240279938e-07, |
|
"loss": 0.4551, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.7812254558021174, |
|
"grad_norm": 1.9993566274642944, |
|
"learning_rate": 1.1800471226121456e-07, |
|
"loss": 0.4742, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 0.7822726749117718, |
|
"grad_norm": 2.9598634243011475, |
|
"learning_rate": 1.1692383711509129e-07, |
|
"loss": 0.5121, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 0.7833198940214261, |
|
"grad_norm": 3.2795605659484863, |
|
"learning_rate": 1.158472791456342e-07, |
|
"loss": 0.5344, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 0.7843671131310804, |
|
"grad_norm": 1.8576877117156982, |
|
"learning_rate": 1.1477505048539387e-07, |
|
"loss": 0.4924, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 0.7854143322407348, |
|
"grad_norm": 1.8820946216583252, |
|
"learning_rate": 1.1370716321813029e-07, |
|
"loss": 0.4794, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.7864615513503891, |
|
"grad_norm": 3.3854475021362305, |
|
"learning_rate": 1.1264362937867784e-07, |
|
"loss": 0.4841, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 0.7875087704600433, |
|
"grad_norm": 3.2768609523773193, |
|
"learning_rate": 1.1158446095280821e-07, |
|
"loss": 0.4802, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 0.7885559895696976, |
|
"grad_norm": 2.02317476272583, |
|
"learning_rate": 1.1052966987709572e-07, |
|
"loss": 0.4762, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 0.789603208679352, |
|
"grad_norm": 2.08528208732605, |
|
"learning_rate": 1.0947926803878366e-07, |
|
"loss": 0.5083, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 0.7906504277890063, |
|
"grad_norm": 2.0258214473724365, |
|
"learning_rate": 1.0843326727564945e-07, |
|
"loss": 0.4927, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.7916976468986606, |
|
"grad_norm": 3.184265375137329, |
|
"learning_rate": 1.0739167937587079e-07, |
|
"loss": 0.5066, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 0.792744866008315, |
|
"grad_norm": 2.808084011077881, |
|
"learning_rate": 1.0635451607789469e-07, |
|
"loss": 0.5172, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 0.7937920851179693, |
|
"grad_norm": 2.172506332397461, |
|
"learning_rate": 1.0532178907030275e-07, |
|
"loss": 0.4797, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 0.7948393042276235, |
|
"grad_norm": 1.9276924133300781, |
|
"learning_rate": 1.0429350999168119e-07, |
|
"loss": 0.5057, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 0.7958865233372778, |
|
"grad_norm": 2.1610867977142334, |
|
"learning_rate": 1.0326969043048955e-07, |
|
"loss": 0.4964, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.7969337424469322, |
|
"grad_norm": 2.5907599925994873, |
|
"learning_rate": 1.0225034192492876e-07, |
|
"loss": 0.4886, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 0.7979809615565865, |
|
"grad_norm": 1.8623499870300293, |
|
"learning_rate": 1.0123547596281257e-07, |
|
"loss": 0.5151, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 0.7990281806662408, |
|
"grad_norm": 1.7319766283035278, |
|
"learning_rate": 1.0022510398143785e-07, |
|
"loss": 0.4983, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 0.8000753997758951, |
|
"grad_norm": 3.9193685054779053, |
|
"learning_rate": 9.921923736745452e-08, |
|
"loss": 0.5011, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 0.8011226188855495, |
|
"grad_norm": 1.8976281881332397, |
|
"learning_rate": 9.821788745673864e-08, |
|
"loss": 0.5036, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.8021698379952037, |
|
"grad_norm": 2.426635980606079, |
|
"learning_rate": 9.722106553426446e-08, |
|
"loss": 0.4993, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 0.803217057104858, |
|
"grad_norm": 1.929158329963684, |
|
"learning_rate": 9.622878283397596e-08, |
|
"loss": 0.515, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 0.8042642762145124, |
|
"grad_norm": 3.309342622756958, |
|
"learning_rate": 9.524105053866182e-08, |
|
"loss": 0.5395, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 0.8053114953241667, |
|
"grad_norm": 1.8991940021514893, |
|
"learning_rate": 9.425787977982869e-08, |
|
"loss": 0.5079, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 0.806358714433821, |
|
"grad_norm": 2.271533250808716, |
|
"learning_rate": 9.32792816375756e-08, |
|
"loss": 0.4579, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.8074059335434753, |
|
"grad_norm": 2.1554083824157715, |
|
"learning_rate": 9.230526714046944e-08, |
|
"loss": 0.4556, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 0.8084531526531297, |
|
"grad_norm": 1.8269262313842773, |
|
"learning_rate": 9.133584726542037e-08, |
|
"loss": 0.4883, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 0.8095003717627839, |
|
"grad_norm": 2.5304064750671387, |
|
"learning_rate": 9.037103293755849e-08, |
|
"loss": 0.4977, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 0.8105475908724382, |
|
"grad_norm": 2.8901185989379883, |
|
"learning_rate": 8.941083503011021e-08, |
|
"loss": 0.5063, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 0.8115948099820925, |
|
"grad_norm": 2.2524912357330322, |
|
"learning_rate": 8.845526436427625e-08, |
|
"loss": 0.5144, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.8126420290917469, |
|
"grad_norm": 2.046915292739868, |
|
"learning_rate": 8.750433170910915e-08, |
|
"loss": 0.4933, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 0.8136892482014012, |
|
"grad_norm": 2.644960641860962, |
|
"learning_rate": 8.655804778139247e-08, |
|
"loss": 0.4962, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 0.8147364673110555, |
|
"grad_norm": 2.299511432647705, |
|
"learning_rate": 8.561642324551954e-08, |
|
"loss": 0.4546, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 0.8157836864207098, |
|
"grad_norm": 2.5044310092926025, |
|
"learning_rate": 8.467946871337344e-08, |
|
"loss": 0.4768, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 0.8168309055303641, |
|
"grad_norm": 1.8609235286712646, |
|
"learning_rate": 8.374719474420749e-08, |
|
"loss": 0.4724, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.8178781246400184, |
|
"grad_norm": 1.9416966438293457, |
|
"learning_rate": 8.281961184452629e-08, |
|
"loss": 0.4956, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 0.8189253437496727, |
|
"grad_norm": 2.851625919342041, |
|
"learning_rate": 8.189673046796702e-08, |
|
"loss": 0.5068, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 0.8199725628593271, |
|
"grad_norm": 2.262005567550659, |
|
"learning_rate": 8.097856101518186e-08, |
|
"loss": 0.4846, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 0.8210197819689814, |
|
"grad_norm": 2.1528186798095703, |
|
"learning_rate": 8.00651138337209e-08, |
|
"loss": 0.4776, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 0.8220670010786357, |
|
"grad_norm": 2.505295991897583, |
|
"learning_rate": 7.915639921791511e-08, |
|
"loss": 0.5012, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.82311422018829, |
|
"grad_norm": 2.5964581966400146, |
|
"learning_rate": 7.825242740876081e-08, |
|
"loss": 0.5111, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 0.8241614392979443, |
|
"grad_norm": 2.3113765716552734, |
|
"learning_rate": 7.735320859380384e-08, |
|
"loss": 0.5262, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 0.8252086584075986, |
|
"grad_norm": 1.8016088008880615, |
|
"learning_rate": 7.645875290702519e-08, |
|
"loss": 0.4794, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 0.8262558775172529, |
|
"grad_norm": 2.7183265686035156, |
|
"learning_rate": 7.556907042872601e-08, |
|
"loss": 0.5013, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 0.8273030966269073, |
|
"grad_norm": 1.6194109916687012, |
|
"learning_rate": 7.46841711854152e-08, |
|
"loss": 0.4662, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.8283503157365616, |
|
"grad_norm": 1.8583705425262451, |
|
"learning_rate": 7.38040651496955e-08, |
|
"loss": 0.4602, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 0.8293975348462159, |
|
"grad_norm": 2.0989129543304443, |
|
"learning_rate": 7.292876224015082e-08, |
|
"loss": 0.4922, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 0.8304447539558701, |
|
"grad_norm": 2.0418784618377686, |
|
"learning_rate": 7.205827232123585e-08, |
|
"loss": 0.5032, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 0.8314919730655245, |
|
"grad_norm": 2.34555983543396, |
|
"learning_rate": 7.119260520316368e-08, |
|
"loss": 0.4912, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 0.8325391921751788, |
|
"grad_norm": 2.5016937255859375, |
|
"learning_rate": 7.033177064179507e-08, |
|
"loss": 0.4792, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.8335864112848331, |
|
"grad_norm": 2.4543182849884033, |
|
"learning_rate": 6.947577833852991e-08, |
|
"loss": 0.4713, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 0.8346336303944875, |
|
"grad_norm": 2.092000961303711, |
|
"learning_rate": 6.862463794019657e-08, |
|
"loss": 0.4607, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 0.8356808495041418, |
|
"grad_norm": 2.430490255355835, |
|
"learning_rate": 6.777835903894324e-08, |
|
"loss": 0.5018, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 0.8367280686137961, |
|
"grad_norm": 1.815276026725769, |
|
"learning_rate": 6.69369511721311e-08, |
|
"loss": 0.4967, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 0.8377752877234503, |
|
"grad_norm": 2.1097006797790527, |
|
"learning_rate": 6.610042382222497e-08, |
|
"loss": 0.4601, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.8388225068331047, |
|
"grad_norm": 3.367506504058838, |
|
"learning_rate": 6.526878641668798e-08, |
|
"loss": 0.4913, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 0.839869725942759, |
|
"grad_norm": 1.4861557483673096, |
|
"learning_rate": 6.444204832787486e-08, |
|
"loss": 0.485, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 0.8409169450524133, |
|
"grad_norm": 2.3718228340148926, |
|
"learning_rate": 6.362021887292578e-08, |
|
"loss": 0.4941, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 0.8419641641620677, |
|
"grad_norm": 2.2200145721435547, |
|
"learning_rate": 6.28033073136619e-08, |
|
"loss": 0.4928, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 0.843011383271722, |
|
"grad_norm": 2.4420855045318604, |
|
"learning_rate": 6.199132285648129e-08, |
|
"loss": 0.515, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.8440586023813763, |
|
"grad_norm": 2.225245714187622, |
|
"learning_rate": 6.118427465225418e-08, |
|
"loss": 0.5029, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 0.8451058214910305, |
|
"grad_norm": 2.7253527641296387, |
|
"learning_rate": 6.038217179622057e-08, |
|
"loss": 0.4898, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 0.8461530406006849, |
|
"grad_norm": 1.8062297105789185, |
|
"learning_rate": 5.958502332788806e-08, |
|
"loss": 0.5089, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 0.8472002597103392, |
|
"grad_norm": 3.0290756225585938, |
|
"learning_rate": 5.8792838230928734e-08, |
|
"loss": 0.4988, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 0.8482474788199935, |
|
"grad_norm": 2.042731523513794, |
|
"learning_rate": 5.800562543307913e-08, |
|
"loss": 0.493, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.8492946979296478, |
|
"grad_norm": 2.5578713417053223, |
|
"learning_rate": 5.722339380603908e-08, |
|
"loss": 0.475, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 0.8503419170393022, |
|
"grad_norm": 3.2866199016571045, |
|
"learning_rate": 5.6446152165371685e-08, |
|
"loss": 0.5102, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 0.8513891361489564, |
|
"grad_norm": 2.475862979888916, |
|
"learning_rate": 5.5673909270404495e-08, |
|
"loss": 0.4896, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 0.8524363552586107, |
|
"grad_norm": 4.128602027893066, |
|
"learning_rate": 5.490667382412978e-08, |
|
"loss": 0.4781, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 0.853483574368265, |
|
"grad_norm": 2.8154897689819336, |
|
"learning_rate": 5.414445447310745e-08, |
|
"loss": 0.5034, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.8545307934779194, |
|
"grad_norm": 2.5624399185180664, |
|
"learning_rate": 5.338725980736736e-08, |
|
"loss": 0.4997, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 0.8555780125875737, |
|
"grad_norm": 2.6771199703216553, |
|
"learning_rate": 5.263509836031193e-08, |
|
"loss": 0.5214, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 0.856625231697228, |
|
"grad_norm": 2.225013494491577, |
|
"learning_rate": 5.1887978608620596e-08, |
|
"loss": 0.4838, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 0.8576724508068824, |
|
"grad_norm": 2.8142294883728027, |
|
"learning_rate": 5.114590897215448e-08, |
|
"loss": 0.5037, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 0.8587196699165366, |
|
"grad_norm": 2.071779727935791, |
|
"learning_rate": 5.040889781386043e-08, |
|
"loss": 0.4689, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.8597668890261909, |
|
"grad_norm": 2.6963651180267334, |
|
"learning_rate": 4.9676953439677925e-08, |
|
"loss": 0.489, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 0.8608141081358452, |
|
"grad_norm": 2.4148457050323486, |
|
"learning_rate": 4.895008409844481e-08, |
|
"loss": 0.4816, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 0.8618613272454996, |
|
"grad_norm": 2.611649513244629, |
|
"learning_rate": 4.822829798180467e-08, |
|
"loss": 0.5531, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 0.8629085463551539, |
|
"grad_norm": 1.8031556606292725, |
|
"learning_rate": 4.751160322411418e-08, |
|
"loss": 0.454, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 0.8639557654648082, |
|
"grad_norm": 2.0377116203308105, |
|
"learning_rate": 4.680000790235178e-08, |
|
"loss": 0.5212, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.8650029845744626, |
|
"grad_norm": 1.7090651988983154, |
|
"learning_rate": 4.609352003602646e-08, |
|
"loss": 0.4721, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 0.8660502036841168, |
|
"grad_norm": 0.9355291724205017, |
|
"learning_rate": 4.5392147587087315e-08, |
|
"loss": 0.4535, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 0.8670974227937711, |
|
"grad_norm": 2.991403579711914, |
|
"learning_rate": 4.4695898459834016e-08, |
|
"loss": 0.5108, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 0.8681446419034254, |
|
"grad_norm": 2.0942938327789307, |
|
"learning_rate": 4.400478050082751e-08, |
|
"loss": 0.4919, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 0.8691918610130798, |
|
"grad_norm": 1.971248745918274, |
|
"learning_rate": 4.331880149880179e-08, |
|
"loss": 0.4981, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.8702390801227341, |
|
"grad_norm": 2.0472984313964844, |
|
"learning_rate": 4.263796918457613e-08, |
|
"loss": 0.4663, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 0.8712862992323884, |
|
"grad_norm": 2.9207637310028076, |
|
"learning_rate": 4.196229123096762e-08, |
|
"loss": 0.4723, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 0.8723335183420428, |
|
"grad_norm": 2.6545724868774414, |
|
"learning_rate": 4.129177525270511e-08, |
|
"loss": 0.5042, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 0.873380737451697, |
|
"grad_norm": 2.008007526397705, |
|
"learning_rate": 4.0626428806343205e-08, |
|
"loss": 0.4904, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 0.8744279565613513, |
|
"grad_norm": 1.2464555501937866, |
|
"learning_rate": 3.996625939017711e-08, |
|
"loss": 0.5248, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.8754751756710056, |
|
"grad_norm": 3.1436216831207275, |
|
"learning_rate": 3.9311274444158106e-08, |
|
"loss": 0.4924, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 0.87652239478066, |
|
"grad_norm": 3.0234928131103516, |
|
"learning_rate": 3.8661481349809786e-08, |
|
"loss": 0.493, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 0.8775696138903143, |
|
"grad_norm": 2.1175239086151123, |
|
"learning_rate": 3.8016887430144754e-08, |
|
"loss": 0.4933, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 0.8786168329999686, |
|
"grad_norm": 2.497673749923706, |
|
"learning_rate": 3.737749994958228e-08, |
|
"loss": 0.5146, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 0.879664052109623, |
|
"grad_norm": 1.5378285646438599, |
|
"learning_rate": 3.674332611386616e-08, |
|
"loss": 0.4628, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.8807112712192772, |
|
"grad_norm": 3.481321334838867, |
|
"learning_rate": 3.6114373069983885e-08, |
|
"loss": 0.513, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 0.8817584903289315, |
|
"grad_norm": 3.8998842239379883, |
|
"learning_rate": 3.549064790608536e-08, |
|
"loss": 0.5157, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 0.8828057094385858, |
|
"grad_norm": 4.254595756530762, |
|
"learning_rate": 3.487215765140422e-08, |
|
"loss": 0.503, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 0.8838529285482402, |
|
"grad_norm": 1.633023977279663, |
|
"learning_rate": 3.4258909276177584e-08, |
|
"loss": 0.4763, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 0.8849001476578945, |
|
"grad_norm": 2.1271402835845947, |
|
"learning_rate": 3.365090969156764e-08, |
|
"loss": 0.514, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.8859473667675488, |
|
"grad_norm": 2.325639009475708, |
|
"learning_rate": 3.304816574958441e-08, |
|
"loss": 0.5295, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 0.886994585877203, |
|
"grad_norm": 3.336534261703491, |
|
"learning_rate": 3.2450684243007786e-08, |
|
"loss": 0.498, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 0.8880418049868574, |
|
"grad_norm": 2.818937301635742, |
|
"learning_rate": 3.185847190531121e-08, |
|
"loss": 0.4621, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 0.8890890240965117, |
|
"grad_norm": 2.3609235286712646, |
|
"learning_rate": 3.1271535410586136e-08, |
|
"loss": 0.4536, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 0.890136243206166, |
|
"grad_norm": 2.134856939315796, |
|
"learning_rate": 3.06898813734664e-08, |
|
"loss": 0.4955, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.8911834623158204, |
|
"grad_norm": 2.349867105484009, |
|
"learning_rate": 3.011351634905357e-08, |
|
"loss": 0.5, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 0.8922306814254747, |
|
"grad_norm": 2.3223259449005127, |
|
"learning_rate": 2.9542446832843793e-08, |
|
"loss": 0.5176, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 0.893277900535129, |
|
"grad_norm": 2.8934836387634277, |
|
"learning_rate": 2.8976679260653613e-08, |
|
"loss": 0.5069, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 0.8943251196447832, |
|
"grad_norm": 2.5627784729003906, |
|
"learning_rate": 2.8416220008548152e-08, |
|
"loss": 0.5019, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 0.8953723387544376, |
|
"grad_norm": 4.0183796882629395, |
|
"learning_rate": 2.7861075392769275e-08, |
|
"loss": 0.4907, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.8964195578640919, |
|
"grad_norm": 2.2696878910064697, |
|
"learning_rate": 2.7311251669663692e-08, |
|
"loss": 0.4785, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 0.8974667769737462, |
|
"grad_norm": 2.5743296146392822, |
|
"learning_rate": 2.6766755035613155e-08, |
|
"loss": 0.4707, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 0.8985139960834005, |
|
"grad_norm": 2.059088945388794, |
|
"learning_rate": 2.622759162696464e-08, |
|
"loss": 0.5246, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 0.8995612151930549, |
|
"grad_norm": 1.2305697202682495, |
|
"learning_rate": 2.5693767519960496e-08, |
|
"loss": 0.4841, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 0.9006084343027092, |
|
"grad_norm": 3.181995153427124, |
|
"learning_rate": 2.5165288730670585e-08, |
|
"loss": 0.4882, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.9016556534123634, |
|
"grad_norm": 2.311540365219116, |
|
"learning_rate": 2.464216121492463e-08, |
|
"loss": 0.4918, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 0.9027028725220178, |
|
"grad_norm": 1.5216143131256104, |
|
"learning_rate": 2.412439086824436e-08, |
|
"loss": 0.4877, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 0.9037500916316721, |
|
"grad_norm": 1.816412091255188, |
|
"learning_rate": 2.361198352577759e-08, |
|
"loss": 0.495, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 0.9047973107413264, |
|
"grad_norm": 1.8467931747436523, |
|
"learning_rate": 2.310494496223253e-08, |
|
"loss": 0.517, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 0.9058445298509807, |
|
"grad_norm": 1.95524001121521, |
|
"learning_rate": 2.260328089181246e-08, |
|
"loss": 0.4702, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.9068917489606351, |
|
"grad_norm": 2.4727303981781006, |
|
"learning_rate": 2.210699696815127e-08, |
|
"loss": 0.498, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 0.9079389680702894, |
|
"grad_norm": 3.1941773891448975, |
|
"learning_rate": 2.1616098784250082e-08, |
|
"loss": 0.4655, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 0.9089861871799436, |
|
"grad_norm": 3.8430733680725098, |
|
"learning_rate": 2.1130591872413837e-08, |
|
"loss": 0.5178, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 0.910033406289598, |
|
"grad_norm": 1.787541151046753, |
|
"learning_rate": 2.0650481704189315e-08, |
|
"loss": 0.4858, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 0.9110806253992523, |
|
"grad_norm": 1.8147176504135132, |
|
"learning_rate": 2.017577369030321e-08, |
|
"loss": 0.4997, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.9121278445089066, |
|
"grad_norm": 2.207904100418091, |
|
"learning_rate": 1.9706473180601145e-08, |
|
"loss": 0.4998, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 0.9131750636185609, |
|
"grad_norm": 2.220478057861328, |
|
"learning_rate": 1.9242585463987548e-08, |
|
"loss": 0.4939, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 0.9142222827282153, |
|
"grad_norm": 2.459459066390991, |
|
"learning_rate": 1.878411576836597e-08, |
|
"loss": 0.5106, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 0.9152695018378696, |
|
"grad_norm": 1.8161354064941406, |
|
"learning_rate": 1.8331069260580147e-08, |
|
"loss": 0.4519, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 0.9163167209475238, |
|
"grad_norm": 2.2104363441467285, |
|
"learning_rate": 1.78834510463558e-08, |
|
"loss": 0.4841, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.9173639400571781, |
|
"grad_norm": 3.3614344596862793, |
|
"learning_rate": 1.744126617024305e-08, |
|
"loss": 0.4699, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 0.9184111591668325, |
|
"grad_norm": 1.9489402770996094, |
|
"learning_rate": 1.70045196155596e-08, |
|
"loss": 0.4884, |
|
"step": 87700 |
|
}, |
|
{ |
|
"epoch": 0.9194583782764868, |
|
"grad_norm": 2.2660348415374756, |
|
"learning_rate": 1.6573216304334615e-08, |
|
"loss": 0.4971, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 0.9205055973861411, |
|
"grad_norm": 1.9117883443832397, |
|
"learning_rate": 1.6147361097253122e-08, |
|
"loss": 0.5133, |
|
"step": 87900 |
|
}, |
|
{ |
|
"epoch": 0.9215528164957955, |
|
"grad_norm": 2.3087127208709717, |
|
"learning_rate": 1.5726958793601476e-08, |
|
"loss": 0.481, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.9226000356054497, |
|
"grad_norm": 2.1353018283843994, |
|
"learning_rate": 1.5312014131212914e-08, |
|
"loss": 0.4618, |
|
"step": 88100 |
|
}, |
|
{ |
|
"epoch": 0.923647254715104, |
|
"grad_norm": 2.694920778274536, |
|
"learning_rate": 1.4902531786414542e-08, |
|
"loss": 0.4633, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 0.9246944738247583, |
|
"grad_norm": 2.070590019226074, |
|
"learning_rate": 1.4498516373974312e-08, |
|
"loss": 0.5069, |
|
"step": 88300 |
|
}, |
|
{ |
|
"epoch": 0.9257416929344127, |
|
"grad_norm": 1.7129287719726562, |
|
"learning_rate": 1.4099972447049246e-08, |
|
"loss": 0.479, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 0.926788912044067, |
|
"grad_norm": 2.0258448123931885, |
|
"learning_rate": 1.3706904497133964e-08, |
|
"loss": 0.5026, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.9278361311537213, |
|
"grad_norm": 2.2771730422973633, |
|
"learning_rate": 1.331931695401034e-08, |
|
"loss": 0.4739, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 0.9288833502633757, |
|
"grad_norm": 2.1517481803894043, |
|
"learning_rate": 1.2937214185696988e-08, |
|
"loss": 0.5027, |
|
"step": 88700 |
|
}, |
|
{ |
|
"epoch": 0.9299305693730299, |
|
"grad_norm": 2.0524544715881348, |
|
"learning_rate": 1.2560600498400852e-08, |
|
"loss": 0.459, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 0.9309777884826842, |
|
"grad_norm": 2.0591094493865967, |
|
"learning_rate": 1.2189480136467978e-08, |
|
"loss": 0.512, |
|
"step": 88900 |
|
}, |
|
{ |
|
"epoch": 0.9320250075923385, |
|
"grad_norm": 1.7868990898132324, |
|
"learning_rate": 1.1823857282335869e-08, |
|
"loss": 0.4755, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.9330722267019929, |
|
"grad_norm": 2.4516055583953857, |
|
"learning_rate": 1.146373605648676e-08, |
|
"loss": 0.5004, |
|
"step": 89100 |
|
}, |
|
{ |
|
"epoch": 0.9341194458116472, |
|
"grad_norm": 2.602165699005127, |
|
"learning_rate": 1.1109120517400704e-08, |
|
"loss": 0.5163, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 0.9351666649213015, |
|
"grad_norm": 4.763970851898193, |
|
"learning_rate": 1.076001466150972e-08, |
|
"loss": 0.5095, |
|
"step": 89300 |
|
}, |
|
{ |
|
"epoch": 0.9362138840309558, |
|
"grad_norm": 2.463984966278076, |
|
"learning_rate": 1.0416422423153547e-08, |
|
"loss": 0.5034, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 0.9372611031406101, |
|
"grad_norm": 2.4041192531585693, |
|
"learning_rate": 1.0078347674534194e-08, |
|
"loss": 0.4741, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.9383083222502644, |
|
"grad_norm": 3.2481226921081543, |
|
"learning_rate": 9.745794225673288e-09, |
|
"loss": 0.5558, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 0.9393555413599187, |
|
"grad_norm": 2.0538644790649414, |
|
"learning_rate": 9.418765824368625e-09, |
|
"loss": 0.5126, |
|
"step": 89700 |
|
}, |
|
{ |
|
"epoch": 0.940402760469573, |
|
"grad_norm": 3.1280417442321777, |
|
"learning_rate": 9.097266156151972e-09, |
|
"loss": 0.4813, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 0.9414499795792274, |
|
"grad_norm": 2.6181859970092773, |
|
"learning_rate": 8.781298844247608e-09, |
|
"loss": 0.4985, |
|
"step": 89900 |
|
}, |
|
{ |
|
"epoch": 0.9424971986888817, |
|
"grad_norm": 2.8424460887908936, |
|
"learning_rate": 8.470867449531627e-09, |
|
"loss": 0.5032, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.943544417798536, |
|
"grad_norm": 1.8021912574768066, |
|
"learning_rate": 8.165975470491416e-09, |
|
"loss": 0.5082, |
|
"step": 90100 |
|
}, |
|
{ |
|
"epoch": 0.9445916369081903, |
|
"grad_norm": 2.1348044872283936, |
|
"learning_rate": 7.866626343186577e-09, |
|
"loss": 0.4811, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 0.9456388560178446, |
|
"grad_norm": 1.665382981300354, |
|
"learning_rate": 7.572823441210353e-09, |
|
"loss": 0.5137, |
|
"step": 90300 |
|
}, |
|
{ |
|
"epoch": 0.9466860751274989, |
|
"grad_norm": 1.782528281211853, |
|
"learning_rate": 7.284570075650864e-09, |
|
"loss": 0.4861, |
|
"step": 90400 |
|
}, |
|
{ |
|
"epoch": 0.9477332942371532, |
|
"grad_norm": 2.0802054405212402, |
|
"learning_rate": 7.001869495054713e-09, |
|
"loss": 0.5201, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.9487805133468076, |
|
"grad_norm": 2.515943765640259, |
|
"learning_rate": 6.724724885389721e-09, |
|
"loss": 0.4863, |
|
"step": 90600 |
|
}, |
|
{ |
|
"epoch": 0.9498277324564619, |
|
"grad_norm": 1.7922004461288452, |
|
"learning_rate": 6.4531393700092415e-09, |
|
"loss": 0.4858, |
|
"step": 90700 |
|
}, |
|
{ |
|
"epoch": 0.9508749515661162, |
|
"grad_norm": 1.5402792692184448, |
|
"learning_rate": 6.187116009617188e-09, |
|
"loss": 0.5174, |
|
"step": 90800 |
|
}, |
|
{ |
|
"epoch": 0.9519221706757705, |
|
"grad_norm": 2.370882987976074, |
|
"learning_rate": 5.926657802233004e-09, |
|
"loss": 0.5299, |
|
"step": 90900 |
|
}, |
|
{ |
|
"epoch": 0.9529693897854248, |
|
"grad_norm": 2.1812610626220703, |
|
"learning_rate": 5.671767683158357e-09, |
|
"loss": 0.5078, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.9540166088950791, |
|
"grad_norm": 1.9076416492462158, |
|
"learning_rate": 5.422448524944057e-09, |
|
"loss": 0.4871, |
|
"step": 91100 |
|
}, |
|
{ |
|
"epoch": 0.9550638280047334, |
|
"grad_norm": 2.5718798637390137, |
|
"learning_rate": 5.1787031373571326e-09, |
|
"loss": 0.5, |
|
"step": 91200 |
|
}, |
|
{ |
|
"epoch": 0.9561110471143878, |
|
"grad_norm": 1.7200427055358887, |
|
"learning_rate": 4.940534267349861e-09, |
|
"loss": 0.4824, |
|
"step": 91300 |
|
}, |
|
{ |
|
"epoch": 0.9571582662240421, |
|
"grad_norm": 2.0528995990753174, |
|
"learning_rate": 4.7079445990284015e-09, |
|
"loss": 0.4893, |
|
"step": 91400 |
|
}, |
|
{ |
|
"epoch": 0.9582054853336963, |
|
"grad_norm": 2.170036554336548, |
|
"learning_rate": 4.4809367536226e-09, |
|
"loss": 0.5468, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.9592527044433506, |
|
"grad_norm": 2.4191830158233643, |
|
"learning_rate": 4.2595132894565625e-09, |
|
"loss": 0.496, |
|
"step": 91600 |
|
}, |
|
{ |
|
"epoch": 0.960299923553005, |
|
"grad_norm": 3.8748281002044678, |
|
"learning_rate": 4.043676701919741e-09, |
|
"loss": 0.52, |
|
"step": 91700 |
|
}, |
|
{ |
|
"epoch": 0.9613471426626593, |
|
"grad_norm": 2.9865217208862305, |
|
"learning_rate": 3.833429423438838e-09, |
|
"loss": 0.4729, |
|
"step": 91800 |
|
}, |
|
{ |
|
"epoch": 0.9623943617723136, |
|
"grad_norm": 3.5876505374908447, |
|
"learning_rate": 3.628773823450337e-09, |
|
"loss": 0.4557, |
|
"step": 91900 |
|
}, |
|
{ |
|
"epoch": 0.963441580881968, |
|
"grad_norm": 2.007694959640503, |
|
"learning_rate": 3.429712208373847e-09, |
|
"loss": 0.5197, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.9644887999916223, |
|
"grad_norm": 1.564520239830017, |
|
"learning_rate": 3.2362468215861306e-09, |
|
"loss": 0.4519, |
|
"step": 92100 |
|
}, |
|
{ |
|
"epoch": 0.9655360191012765, |
|
"grad_norm": 2.6633753776550293, |
|
"learning_rate": 3.0483798433957876e-09, |
|
"loss": 0.5247, |
|
"step": 92200 |
|
}, |
|
{ |
|
"epoch": 0.9665832382109308, |
|
"grad_norm": 2.7909083366394043, |
|
"learning_rate": 2.8661133910187206e-09, |
|
"loss": 0.4981, |
|
"step": 92300 |
|
}, |
|
{ |
|
"epoch": 0.9676304573205852, |
|
"grad_norm": 2.7965500354766846, |
|
"learning_rate": 2.68944951855421e-09, |
|
"loss": 0.4982, |
|
"step": 92400 |
|
}, |
|
{ |
|
"epoch": 0.9686776764302395, |
|
"grad_norm": 2.164356231689453, |
|
"learning_rate": 2.5183902169618187e-09, |
|
"loss": 0.4926, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.9697248955398938, |
|
"grad_norm": 2.378080368041992, |
|
"learning_rate": 2.352937414038969e-09, |
|
"loss": 0.4796, |
|
"step": 92600 |
|
}, |
|
{ |
|
"epoch": 0.9707721146495482, |
|
"grad_norm": 2.3100953102111816, |
|
"learning_rate": 2.1930929743990136e-09, |
|
"loss": 0.511, |
|
"step": 92700 |
|
}, |
|
{ |
|
"epoch": 0.9718193337592025, |
|
"grad_norm": 1.154026985168457, |
|
"learning_rate": 2.0388586994506964e-09, |
|
"loss": 0.5297, |
|
"step": 92800 |
|
}, |
|
{ |
|
"epoch": 0.9728665528688567, |
|
"grad_norm": 2.432117462158203, |
|
"learning_rate": 1.8902363273772815e-09, |
|
"loss": 0.4869, |
|
"step": 92900 |
|
}, |
|
{ |
|
"epoch": 0.973913771978511, |
|
"grad_norm": 2.1382997035980225, |
|
"learning_rate": 1.7472275331173459e-09, |
|
"loss": 0.5253, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.9749609910881654, |
|
"grad_norm": 2.517921209335327, |
|
"learning_rate": 1.609833928345794e-09, |
|
"loss": 0.4989, |
|
"step": 93100 |
|
}, |
|
{ |
|
"epoch": 0.9760082101978197, |
|
"grad_norm": 2.1486592292785645, |
|
"learning_rate": 1.4780570614556508e-09, |
|
"loss": 0.5392, |
|
"step": 93200 |
|
}, |
|
{ |
|
"epoch": 0.977055429307474, |
|
"grad_norm": 2.8666563034057617, |
|
"learning_rate": 1.3518984175406312e-09, |
|
"loss": 0.4899, |
|
"step": 93300 |
|
}, |
|
{ |
|
"epoch": 0.9781026484171284, |
|
"grad_norm": 2.0608692169189453, |
|
"learning_rate": 1.231359418378486e-09, |
|
"loss": 0.5013, |
|
"step": 93400 |
|
}, |
|
{ |
|
"epoch": 0.9791498675267827, |
|
"grad_norm": 2.5256223678588867, |
|
"learning_rate": 1.1164414224149598e-09, |
|
"loss": 0.506, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.9801970866364369, |
|
"grad_norm": 1.9714406728744507, |
|
"learning_rate": 1.0071457247482485e-09, |
|
"loss": 0.5306, |
|
"step": 93600 |
|
}, |
|
{ |
|
"epoch": 0.9812443057460912, |
|
"grad_norm": 2.5823991298675537, |
|
"learning_rate": 9.034735571147312e-10, |
|
"loss": 0.4887, |
|
"step": 93700 |
|
}, |
|
{ |
|
"epoch": 0.9822915248557456, |
|
"grad_norm": 2.48111891746521, |
|
"learning_rate": 8.054260878749275e-10, |
|
"loss": 0.5309, |
|
"step": 93800 |
|
}, |
|
{ |
|
"epoch": 0.9833387439653999, |
|
"grad_norm": 3.824676752090454, |
|
"learning_rate": 7.130044220003962e-10, |
|
"loss": 0.4919, |
|
"step": 93900 |
|
}, |
|
{ |
|
"epoch": 0.9843859630750542, |
|
"grad_norm": 2.073537588119507, |
|
"learning_rate": 6.26209601061134e-10, |
|
"loss": 0.4679, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.9854331821847085, |
|
"grad_norm": 2.32852840423584, |
|
"learning_rate": 5.450426032140298e-10, |
|
"loss": 0.4893, |
|
"step": 94100 |
|
}, |
|
{ |
|
"epoch": 0.9864804012943629, |
|
"grad_norm": 3.0331838130950928, |
|
"learning_rate": 4.695043431917068e-10, |
|
"loss": 0.4837, |
|
"step": 94200 |
|
}, |
|
{ |
|
"epoch": 0.9875276204040171, |
|
"grad_norm": 2.3463919162750244, |
|
"learning_rate": 3.995956722922522e-10, |
|
"loss": 0.4748, |
|
"step": 94300 |
|
}, |
|
{ |
|
"epoch": 0.9885748395136714, |
|
"grad_norm": 3.0472140312194824, |
|
"learning_rate": 3.3531737836967054e-10, |
|
"loss": 0.5212, |
|
"step": 94400 |
|
}, |
|
{ |
|
"epoch": 0.9896220586233258, |
|
"grad_norm": 1.4455373287200928, |
|
"learning_rate": 2.766701858250009e-10, |
|
"loss": 0.4858, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.9906692777329801, |
|
"grad_norm": 2.5533838272094727, |
|
"learning_rate": 2.2365475559799064e-10, |
|
"loss": 0.5016, |
|
"step": 94600 |
|
}, |
|
{ |
|
"epoch": 0.9917164968426344, |
|
"grad_norm": 2.4406557083129883, |
|
"learning_rate": 1.762716851599344e-10, |
|
"loss": 0.4551, |
|
"step": 94700 |
|
}, |
|
{ |
|
"epoch": 0.9927637159522887, |
|
"grad_norm": 2.5848546028137207, |
|
"learning_rate": 1.3452150850656872e-10, |
|
"loss": 0.4797, |
|
"step": 94800 |
|
}, |
|
{ |
|
"epoch": 0.993810935061943, |
|
"grad_norm": 2.0372912883758545, |
|
"learning_rate": 9.84046961525209e-11, |
|
"loss": 0.4646, |
|
"step": 94900 |
|
}, |
|
{ |
|
"epoch": 0.9948581541715973, |
|
"grad_norm": 2.8523876667022705, |
|
"learning_rate": 6.792165512553571e-11, |
|
"loss": 0.4876, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.9959053732812516, |
|
"grad_norm": 2.202986001968384, |
|
"learning_rate": 4.3072728962256774e-11, |
|
"loss": 0.5156, |
|
"step": 95100 |
|
}, |
|
{ |
|
"epoch": 0.996952592390906, |
|
"grad_norm": 2.1548354625701904, |
|
"learning_rate": 2.3858197704063055e-11, |
|
"loss": 0.5241, |
|
"step": 95200 |
|
}, |
|
{ |
|
"epoch": 0.9979998115005603, |
|
"grad_norm": 1.8615128993988037, |
|
"learning_rate": 1.0278277894182342e-11, |
|
"loss": 0.4658, |
|
"step": 95300 |
|
}, |
|
{ |
|
"epoch": 0.9990470306102146, |
|
"grad_norm": 2.989764928817749, |
|
"learning_rate": 2.3331225750267137e-12, |
|
"loss": 0.5486, |
|
"step": 95400 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 95491, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.299285301826683e+17, |
|
"train_batch_size": 3, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|