|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 3089, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0032372936225315637, |
|
"grad_norm": 5.914970298041148, |
|
"learning_rate": 3.2362459546925565e-07, |
|
"loss": 0.8345, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006474587245063127, |
|
"grad_norm": 3.7232256971173268, |
|
"learning_rate": 6.472491909385113e-07, |
|
"loss": 0.7733, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.009711880867594691, |
|
"grad_norm": 2.7946325934816634, |
|
"learning_rate": 9.70873786407767e-07, |
|
"loss": 0.6634, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.012949174490126255, |
|
"grad_norm": 3.6676755875340263, |
|
"learning_rate": 1.2944983818770226e-06, |
|
"loss": 0.6075, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01618646811265782, |
|
"grad_norm": 2.4485205913142916, |
|
"learning_rate": 1.6181229773462783e-06, |
|
"loss": 0.5663, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.019423761735189383, |
|
"grad_norm": 2.4316459196101996, |
|
"learning_rate": 1.941747572815534e-06, |
|
"loss": 0.5943, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.022661055357720946, |
|
"grad_norm": 2.3044316553901125, |
|
"learning_rate": 2.26537216828479e-06, |
|
"loss": 0.5652, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02589834898025251, |
|
"grad_norm": 2.9385172928902823, |
|
"learning_rate": 2.588996763754045e-06, |
|
"loss": 0.5634, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.029135642602784072, |
|
"grad_norm": 2.221999891073394, |
|
"learning_rate": 2.912621359223301e-06, |
|
"loss": 0.5387, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03237293622531564, |
|
"grad_norm": 2.186070765481556, |
|
"learning_rate": 3.2362459546925567e-06, |
|
"loss": 0.5459, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0356102298478472, |
|
"grad_norm": 1.9674728126053138, |
|
"learning_rate": 3.5598705501618126e-06, |
|
"loss": 0.5463, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.038847523470378766, |
|
"grad_norm": 2.1315512928027034, |
|
"learning_rate": 3.883495145631068e-06, |
|
"loss": 0.544, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04208481709291033, |
|
"grad_norm": 2.135135712788282, |
|
"learning_rate": 4.207119741100324e-06, |
|
"loss": 0.5484, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04532211071544189, |
|
"grad_norm": 2.184082631640455, |
|
"learning_rate": 4.53074433656958e-06, |
|
"loss": 0.5123, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.048559404337973455, |
|
"grad_norm": 2.157079343663751, |
|
"learning_rate": 4.854368932038836e-06, |
|
"loss": 0.5421, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05179669796050502, |
|
"grad_norm": 2.171293112107742, |
|
"learning_rate": 5.17799352750809e-06, |
|
"loss": 0.5589, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05503399158303658, |
|
"grad_norm": 1.9239122091594658, |
|
"learning_rate": 5.501618122977347e-06, |
|
"loss": 0.5452, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.058271285205568145, |
|
"grad_norm": 1.9375062757821118, |
|
"learning_rate": 5.825242718446602e-06, |
|
"loss": 0.5272, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06150857882809971, |
|
"grad_norm": 2.3849782638039154, |
|
"learning_rate": 6.148867313915859e-06, |
|
"loss": 0.5132, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06474587245063128, |
|
"grad_norm": 1.8674953957006846, |
|
"learning_rate": 6.472491909385113e-06, |
|
"loss": 0.5372, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06798316607316283, |
|
"grad_norm": 1.9339311401708401, |
|
"learning_rate": 6.79611650485437e-06, |
|
"loss": 0.546, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.0712204596956944, |
|
"grad_norm": 2.1072958412701044, |
|
"learning_rate": 7.119741100323625e-06, |
|
"loss": 0.5242, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.07445775331822596, |
|
"grad_norm": 2.377518773852171, |
|
"learning_rate": 7.443365695792882e-06, |
|
"loss": 0.5293, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.07769504694075753, |
|
"grad_norm": 2.0442232616421134, |
|
"learning_rate": 7.766990291262136e-06, |
|
"loss": 0.5299, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.08093234056328909, |
|
"grad_norm": 2.0780538675854663, |
|
"learning_rate": 8.090614886731393e-06, |
|
"loss": 0.5356, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.08416963418582066, |
|
"grad_norm": 3.8664800439938944, |
|
"learning_rate": 8.414239482200647e-06, |
|
"loss": 0.5208, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.08740692780835221, |
|
"grad_norm": 1.8534756204932774, |
|
"learning_rate": 8.737864077669904e-06, |
|
"loss": 0.5576, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.09064422143088378, |
|
"grad_norm": 3.0411642696089425, |
|
"learning_rate": 9.06148867313916e-06, |
|
"loss": 0.5138, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.09388151505341534, |
|
"grad_norm": 2.4331984099340755, |
|
"learning_rate": 9.385113268608415e-06, |
|
"loss": 0.5218, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.09711880867594691, |
|
"grad_norm": 1.753588947234162, |
|
"learning_rate": 9.708737864077671e-06, |
|
"loss": 0.5575, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.10035610229847847, |
|
"grad_norm": 2.192062741982724, |
|
"learning_rate": 9.999996807358784e-06, |
|
"loss": 0.5312, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.10359339592101004, |
|
"grad_norm": 1.5161263334784383, |
|
"learning_rate": 9.999613695346183e-06, |
|
"loss": 0.5467, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1068306895435416, |
|
"grad_norm": 1.5940034702172312, |
|
"learning_rate": 9.998592111150392e-06, |
|
"loss": 0.5373, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.11006798316607316, |
|
"grad_norm": 1.506000200507162, |
|
"learning_rate": 9.996932185232106e-06, |
|
"loss": 0.5314, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.11330527678860472, |
|
"grad_norm": 1.7139944002155716, |
|
"learning_rate": 9.994634129571013e-06, |
|
"loss": 0.5373, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.11654257041113629, |
|
"grad_norm": 1.944009244747707, |
|
"learning_rate": 9.991698237638708e-06, |
|
"loss": 0.5157, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.11977986403366786, |
|
"grad_norm": 1.3403254941353564, |
|
"learning_rate": 9.988124884361222e-06, |
|
"loss": 0.5472, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.12301715765619942, |
|
"grad_norm": 2.1963515894000794, |
|
"learning_rate": 9.983914526071148e-06, |
|
"loss": 0.542, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.12625445127873097, |
|
"grad_norm": 2.751448769848235, |
|
"learning_rate": 9.979067700449358e-06, |
|
"loss": 0.5386, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.12949174490126256, |
|
"grad_norm": 2.1722392044422545, |
|
"learning_rate": 9.973585026456338e-06, |
|
"loss": 0.5281, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1327290385237941, |
|
"grad_norm": 1.6770114606853526, |
|
"learning_rate": 9.967467204253153e-06, |
|
"loss": 0.5589, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.13596633214632567, |
|
"grad_norm": 1.9183336831409301, |
|
"learning_rate": 9.960715015112022e-06, |
|
"loss": 0.5072, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.13920362576885723, |
|
"grad_norm": 2.384394686356415, |
|
"learning_rate": 9.953329321316556e-06, |
|
"loss": 0.5373, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.1424409193913888, |
|
"grad_norm": 2.537628989863915, |
|
"learning_rate": 9.945311066051632e-06, |
|
"loss": 0.5283, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.14567821301392037, |
|
"grad_norm": 1.4128529976835362, |
|
"learning_rate": 9.936661273282957e-06, |
|
"loss": 0.5444, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.14891550663645192, |
|
"grad_norm": 1.269855727764354, |
|
"learning_rate": 9.927381047626283e-06, |
|
"loss": 0.5115, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.15215280025898348, |
|
"grad_norm": 1.403995189704697, |
|
"learning_rate": 9.917471574206366e-06, |
|
"loss": 0.5058, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.15539009388151506, |
|
"grad_norm": 1.984703757619364, |
|
"learning_rate": 9.9069341185056e-06, |
|
"loss": 0.5325, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.15862738750404662, |
|
"grad_norm": 5.350351817604479, |
|
"learning_rate": 9.895770026202424e-06, |
|
"loss": 0.528, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.16186468112657817, |
|
"grad_norm": 1.3441213050436032, |
|
"learning_rate": 9.883980722999467e-06, |
|
"loss": 0.5492, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16510197474910973, |
|
"grad_norm": 1.6583123951777, |
|
"learning_rate": 9.871567714441481e-06, |
|
"loss": 0.5166, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.16833926837164132, |
|
"grad_norm": 1.819057899971267, |
|
"learning_rate": 9.858532585723071e-06, |
|
"loss": 0.5382, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.17157656199417287, |
|
"grad_norm": 1.401898794325159, |
|
"learning_rate": 9.84487700148627e-06, |
|
"loss": 0.5266, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.17481385561670443, |
|
"grad_norm": 1.363660786872396, |
|
"learning_rate": 9.830602705607946e-06, |
|
"loss": 0.5482, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.178051149239236, |
|
"grad_norm": 1.347346394618236, |
|
"learning_rate": 9.81571152097711e-06, |
|
"loss": 0.509, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.18128844286176757, |
|
"grad_norm": 1.153935203111527, |
|
"learning_rate": 9.800205349262115e-06, |
|
"loss": 0.5229, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.18452573648429912, |
|
"grad_norm": 1.3687757149985038, |
|
"learning_rate": 9.784086170667817e-06, |
|
"loss": 0.5266, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.18776303010683068, |
|
"grad_norm": 1.4692106944797128, |
|
"learning_rate": 9.767356043682687e-06, |
|
"loss": 0.5131, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.19100032372936226, |
|
"grad_norm": 2.090443291629213, |
|
"learning_rate": 9.750017104815932e-06, |
|
"loss": 0.5237, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.19423761735189382, |
|
"grad_norm": 1.6775830237228662, |
|
"learning_rate": 9.732071568324662e-06, |
|
"loss": 0.5137, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.19747491097442538, |
|
"grad_norm": 1.3780856622111588, |
|
"learning_rate": 9.713521725931107e-06, |
|
"loss": 0.5474, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.20071220459695693, |
|
"grad_norm": 1.2939695420785708, |
|
"learning_rate": 9.694369946529964e-06, |
|
"loss": 0.5355, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.20394949821948852, |
|
"grad_norm": 1.4450768355958643, |
|
"learning_rate": 9.674618675885878e-06, |
|
"loss": 0.5514, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.20718679184202007, |
|
"grad_norm": 1.327992017483562, |
|
"learning_rate": 9.654270436321103e-06, |
|
"loss": 0.5329, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.21042408546455163, |
|
"grad_norm": 1.7989475405276711, |
|
"learning_rate": 9.633327826393392e-06, |
|
"loss": 0.5157, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.2136613790870832, |
|
"grad_norm": 2.05329586810244, |
|
"learning_rate": 9.611793520564155e-06, |
|
"loss": 0.5133, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.21689867270961477, |
|
"grad_norm": 1.5423450609784548, |
|
"learning_rate": 9.589670268856913e-06, |
|
"loss": 0.542, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.22013596633214633, |
|
"grad_norm": 1.4428683673038962, |
|
"learning_rate": 9.566960896506108e-06, |
|
"loss": 0.4971, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.22337325995467788, |
|
"grad_norm": 1.364527266808998, |
|
"learning_rate": 9.543668303596313e-06, |
|
"loss": 0.5173, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.22661055357720944, |
|
"grad_norm": 1.5271705470696573, |
|
"learning_rate": 9.519795464691873e-06, |
|
"loss": 0.517, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.22984784719974102, |
|
"grad_norm": 1.5089398058797954, |
|
"learning_rate": 9.495345428457048e-06, |
|
"loss": 0.5236, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.23308514082227258, |
|
"grad_norm": 2.1076475628907057, |
|
"learning_rate": 9.470321317266679e-06, |
|
"loss": 0.5376, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.23632243444480414, |
|
"grad_norm": 2.2100737808822943, |
|
"learning_rate": 9.444726326807445e-06, |
|
"loss": 0.5191, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.23955972806733572, |
|
"grad_norm": 1.5902060955759352, |
|
"learning_rate": 9.418563725669772e-06, |
|
"loss": 0.505, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.24279702168986728, |
|
"grad_norm": 1.395823374280939, |
|
"learning_rate": 9.391836854930404e-06, |
|
"loss": 0.5249, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.24603431531239883, |
|
"grad_norm": 1.409554350409635, |
|
"learning_rate": 9.364549127725749e-06, |
|
"loss": 0.4998, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2492716089349304, |
|
"grad_norm": 1.2811036840491727, |
|
"learning_rate": 9.33670402881599e-06, |
|
"loss": 0.5132, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.25250890255746195, |
|
"grad_norm": 2.683941547894333, |
|
"learning_rate": 9.308305114140086e-06, |
|
"loss": 0.513, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2557461961799935, |
|
"grad_norm": 1.4991894757393012, |
|
"learning_rate": 9.279356010361647e-06, |
|
"loss": 0.5149, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2589834898025251, |
|
"grad_norm": 1.2507685206168797, |
|
"learning_rate": 9.249860414405794e-06, |
|
"loss": 0.4969, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.26222078342505667, |
|
"grad_norm": 1.35022647512136, |
|
"learning_rate": 9.219822092987061e-06, |
|
"loss": 0.4938, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.2654580770475882, |
|
"grad_norm": 1.3005194873163757, |
|
"learning_rate": 9.18924488212835e-06, |
|
"loss": 0.4941, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2686953706701198, |
|
"grad_norm": 1.2581885113440374, |
|
"learning_rate": 9.158132686671071e-06, |
|
"loss": 0.4959, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.27193266429265134, |
|
"grad_norm": 1.9420918227035224, |
|
"learning_rate": 9.126489479776461e-06, |
|
"loss": 0.5171, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.2751699579151829, |
|
"grad_norm": 1.1789473175722394, |
|
"learning_rate": 9.09431930241821e-06, |
|
"loss": 0.5132, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.27840725153771445, |
|
"grad_norm": 1.6644531651075887, |
|
"learning_rate": 9.061626262866403e-06, |
|
"loss": 0.521, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.281644545160246, |
|
"grad_norm": 1.3772893833242836, |
|
"learning_rate": 9.028414536162873e-06, |
|
"loss": 0.501, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.2848818387827776, |
|
"grad_norm": 1.4605098649279153, |
|
"learning_rate": 8.994688363588035e-06, |
|
"loss": 0.5014, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2881191324053092, |
|
"grad_norm": 1.2543294792487787, |
|
"learning_rate": 8.960452052119259e-06, |
|
"loss": 0.4897, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.29135642602784073, |
|
"grad_norm": 1.4701554087797506, |
|
"learning_rate": 8.925709973880844e-06, |
|
"loss": 0.4974, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2945937196503723, |
|
"grad_norm": 1.2834425518492831, |
|
"learning_rate": 8.890466565585684e-06, |
|
"loss": 0.5074, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.29783101327290384, |
|
"grad_norm": 1.1087811452110212, |
|
"learning_rate": 8.854726327968675e-06, |
|
"loss": 0.5086, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.3010683068954354, |
|
"grad_norm": 1.1192002738248805, |
|
"learning_rate": 8.818493825211962e-06, |
|
"loss": 0.5145, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.30430560051796696, |
|
"grad_norm": 1.2164084119048657, |
|
"learning_rate": 8.781773684362058e-06, |
|
"loss": 0.5154, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.30754289414049857, |
|
"grad_norm": 1.7852611552660387, |
|
"learning_rate": 8.744570594738965e-06, |
|
"loss": 0.5147, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3107801877630301, |
|
"grad_norm": 2.0895432300719112, |
|
"learning_rate": 8.706889307337322e-06, |
|
"loss": 0.5101, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.3140174813855617, |
|
"grad_norm": 1.305634982599103, |
|
"learning_rate": 8.668734634219677e-06, |
|
"loss": 0.5204, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.31725477500809324, |
|
"grad_norm": 1.5110057953446319, |
|
"learning_rate": 8.630111447901974e-06, |
|
"loss": 0.4925, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.3204920686306248, |
|
"grad_norm": 1.2113963088571518, |
|
"learning_rate": 8.59102468073131e-06, |
|
"loss": 0.5045, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.32372936225315635, |
|
"grad_norm": 1.1406356602000782, |
|
"learning_rate": 8.551479324256052e-06, |
|
"loss": 0.4903, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3269666558756879, |
|
"grad_norm": 1.2091711143518766, |
|
"learning_rate": 8.51148042858839e-06, |
|
"loss": 0.5018, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.33020394949821946, |
|
"grad_norm": 1.2863060350140245, |
|
"learning_rate": 8.471033101759426e-06, |
|
"loss": 0.4991, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.3334412431207511, |
|
"grad_norm": 1.6631825587574212, |
|
"learning_rate": 8.430142509066848e-06, |
|
"loss": 0.5096, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.33667853674328263, |
|
"grad_norm": 1.2839071122317827, |
|
"learning_rate": 8.388813872415301e-06, |
|
"loss": 0.5036, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.3399158303658142, |
|
"grad_norm": 1.4515170974118698, |
|
"learning_rate": 8.347052469649534e-06, |
|
"loss": 0.4917, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.34315312398834574, |
|
"grad_norm": 1.1940858318622096, |
|
"learning_rate": 8.304863633880385e-06, |
|
"loss": 0.4927, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.3463904176108773, |
|
"grad_norm": 2.041119119310852, |
|
"learning_rate": 8.262252752803728e-06, |
|
"loss": 0.5021, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.34962771123340886, |
|
"grad_norm": 1.5598325467101413, |
|
"learning_rate": 8.21922526801244e-06, |
|
"loss": 0.4949, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.3528650048559404, |
|
"grad_norm": 1.3210055095635682, |
|
"learning_rate": 8.175786674301486e-06, |
|
"loss": 0.4916, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.356102298478472, |
|
"grad_norm": 1.4444985236942383, |
|
"learning_rate": 8.131942518966205e-06, |
|
"loss": 0.515, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3593395921010036, |
|
"grad_norm": 2.096926087895547, |
|
"learning_rate": 8.087698401093904e-06, |
|
"loss": 0.5046, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.36257688572353514, |
|
"grad_norm": 1.8779799622755553, |
|
"learning_rate": 8.043059970848827e-06, |
|
"loss": 0.4925, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3658141793460667, |
|
"grad_norm": 1.1855292279659888, |
|
"learning_rate": 7.998032928750603e-06, |
|
"loss": 0.4915, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.36905147296859825, |
|
"grad_norm": 1.2561037766110636, |
|
"learning_rate": 7.95262302494627e-06, |
|
"loss": 0.4921, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3722887665911298, |
|
"grad_norm": 1.2308136471771238, |
|
"learning_rate": 7.906836058475947e-06, |
|
"loss": 0.4824, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.37552606021366136, |
|
"grad_norm": 1.4251854056491962, |
|
"learning_rate": 7.860677876532284e-06, |
|
"loss": 0.4998, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.3787633538361929, |
|
"grad_norm": 2.348386527172616, |
|
"learning_rate": 7.814154373713746e-06, |
|
"loss": 0.4908, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.38200064745872453, |
|
"grad_norm": 1.865144066346986, |
|
"learning_rate": 7.76727149127184e-06, |
|
"loss": 0.4846, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3852379410812561, |
|
"grad_norm": 1.4007403629971613, |
|
"learning_rate": 7.720035216352398e-06, |
|
"loss": 0.4874, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.38847523470378764, |
|
"grad_norm": 1.1445479147677562, |
|
"learning_rate": 7.672451581230997e-06, |
|
"loss": 0.4957, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3917125283263192, |
|
"grad_norm": 1.4508487552186167, |
|
"learning_rate": 7.624526662542602e-06, |
|
"loss": 0.4733, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.39494982194885075, |
|
"grad_norm": 1.3478086965831404, |
|
"learning_rate": 7.57626658050556e-06, |
|
"loss": 0.47, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.3981871155713823, |
|
"grad_norm": 1.9590592831165203, |
|
"learning_rate": 7.527677498140019e-06, |
|
"loss": 0.4722, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.40142440919391387, |
|
"grad_norm": 1.2643798071505419, |
|
"learning_rate": 7.4787656204808865e-06, |
|
"loss": 0.4917, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.4046617028164455, |
|
"grad_norm": 1.1231557245992803, |
|
"learning_rate": 7.429537193785417e-06, |
|
"loss": 0.4672, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.40789899643897704, |
|
"grad_norm": 1.5059792239616556, |
|
"learning_rate": 7.379998504735534e-06, |
|
"loss": 0.485, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.4111362900615086, |
|
"grad_norm": 1.3294673643320833, |
|
"learning_rate": 7.33015587963501e-06, |
|
"loss": 0.4683, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.41437358368404015, |
|
"grad_norm": 1.2458719277331098, |
|
"learning_rate": 7.280015683601549e-06, |
|
"loss": 0.4947, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.4176108773065717, |
|
"grad_norm": 1.5392092717085768, |
|
"learning_rate": 7.2295843197539506e-06, |
|
"loss": 0.4872, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.42084817092910326, |
|
"grad_norm": 1.312225721855963, |
|
"learning_rate": 7.178868228394389e-06, |
|
"loss": 0.465, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4240854645516348, |
|
"grad_norm": 1.2479239867456877, |
|
"learning_rate": 7.127873886185976e-06, |
|
"loss": 0.4854, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.4273227581741664, |
|
"grad_norm": 2.0594970342805072, |
|
"learning_rate": 7.076607805325648e-06, |
|
"loss": 0.4957, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.430560051796698, |
|
"grad_norm": 1.3369762999688763, |
|
"learning_rate": 7.025076532712538e-06, |
|
"loss": 0.4926, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.43379734541922954, |
|
"grad_norm": 1.253628281059844, |
|
"learning_rate": 6.97328664911191e-06, |
|
"loss": 0.4827, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.4370346390417611, |
|
"grad_norm": 1.5718283098388837, |
|
"learning_rate": 6.921244768314762e-06, |
|
"loss": 0.4688, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.44027193266429265, |
|
"grad_norm": 1.221169832657551, |
|
"learning_rate": 6.868957536293214e-06, |
|
"loss": 0.4756, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.4435092262868242, |
|
"grad_norm": 1.7633030570206691, |
|
"learning_rate": 6.816431630351801e-06, |
|
"loss": 0.4782, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.44674651990935577, |
|
"grad_norm": 1.1388033599578846, |
|
"learning_rate": 6.763673758274738e-06, |
|
"loss": 0.4547, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.4499838135318873, |
|
"grad_norm": 1.8583763069122066, |
|
"learning_rate": 6.7106906574693175e-06, |
|
"loss": 0.4677, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.4532211071544189, |
|
"grad_norm": 2.6222452513802423, |
|
"learning_rate": 6.657489094105511e-06, |
|
"loss": 0.4663, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.4564584007769505, |
|
"grad_norm": 1.2420631202470083, |
|
"learning_rate": 6.604075862251892e-06, |
|
"loss": 0.4756, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.45969569439948205, |
|
"grad_norm": 1.3711399168409348, |
|
"learning_rate": 6.55045778300802e-06, |
|
"loss": 0.4792, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.4629329880220136, |
|
"grad_norm": 1.2747272442800757, |
|
"learning_rate": 6.496641703633339e-06, |
|
"loss": 0.4945, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.46617028164454516, |
|
"grad_norm": 1.9542903017844309, |
|
"learning_rate": 6.442634496672771e-06, |
|
"loss": 0.4782, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.4694075752670767, |
|
"grad_norm": 1.2575700419582634, |
|
"learning_rate": 6.388443059079046e-06, |
|
"loss": 0.4946, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.47264486888960827, |
|
"grad_norm": 1.7374379373347664, |
|
"learning_rate": 6.334074311331938e-06, |
|
"loss": 0.4727, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.47588216251213983, |
|
"grad_norm": 1.4848465822905959, |
|
"learning_rate": 6.279535196554497e-06, |
|
"loss": 0.4771, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.47911945613467144, |
|
"grad_norm": 1.3014806293507155, |
|
"learning_rate": 6.224832679626371e-06, |
|
"loss": 0.4558, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.482356749757203, |
|
"grad_norm": 1.1234018842757745, |
|
"learning_rate": 6.169973746294367e-06, |
|
"loss": 0.4807, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.48559404337973455, |
|
"grad_norm": 1.2339190353189107, |
|
"learning_rate": 6.114965402280342e-06, |
|
"loss": 0.4764, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4888313370022661, |
|
"grad_norm": 1.1985465287370143, |
|
"learning_rate": 6.059814672386535e-06, |
|
"loss": 0.4653, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.49206863062479766, |
|
"grad_norm": 1.0962666033332409, |
|
"learning_rate": 6.0045285995984795e-06, |
|
"loss": 0.4758, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4953059242473292, |
|
"grad_norm": 1.4061424214222114, |
|
"learning_rate": 5.9491142441855755e-06, |
|
"loss": 0.4445, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4985432178698608, |
|
"grad_norm": 1.698961611178768, |
|
"learning_rate": 5.8935786827994705e-06, |
|
"loss": 0.4844, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.5017805114923923, |
|
"grad_norm": 1.2773530358959824, |
|
"learning_rate": 5.83792900757033e-06, |
|
"loss": 0.4711, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.5050178051149239, |
|
"grad_norm": 1.097531946277775, |
|
"learning_rate": 5.782172325201155e-06, |
|
"loss": 0.4587, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.5082550987374554, |
|
"grad_norm": 1.3771950799998722, |
|
"learning_rate": 5.726315756060214e-06, |
|
"loss": 0.4833, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.511492392359987, |
|
"grad_norm": 1.2640070993851824, |
|
"learning_rate": 5.67036643327175e-06, |
|
"loss": 0.4698, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.5147296859825187, |
|
"grad_norm": 1.4765324665224344, |
|
"learning_rate": 5.61433150180504e-06, |
|
"loss": 0.4792, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.5179669796050502, |
|
"grad_norm": 1.270051397725334, |
|
"learning_rate": 5.558218117561966e-06, |
|
"loss": 0.4691, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5212042732275818, |
|
"grad_norm": 1.487262847973142, |
|
"learning_rate": 5.502033446463157e-06, |
|
"loss": 0.4549, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.5244415668501133, |
|
"grad_norm": 1.0818684589754615, |
|
"learning_rate": 5.445784663532892e-06, |
|
"loss": 0.4444, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.5276788604726449, |
|
"grad_norm": 1.096657186329996, |
|
"learning_rate": 5.389478951982795e-06, |
|
"loss": 0.4403, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.5309161540951765, |
|
"grad_norm": 1.2316375993715067, |
|
"learning_rate": 5.3331235022945275e-06, |
|
"loss": 0.4633, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.534153447717708, |
|
"grad_norm": 1.336530587237014, |
|
"learning_rate": 5.276725511301522e-06, |
|
"loss": 0.4612, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.5373907413402396, |
|
"grad_norm": 1.2767954358198574, |
|
"learning_rate": 5.2202921812699224e-06, |
|
"loss": 0.4797, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.5406280349627711, |
|
"grad_norm": 1.2515640963616828, |
|
"learning_rate": 5.163830718978814e-06, |
|
"loss": 0.4426, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.5438653285853027, |
|
"grad_norm": 1.4888354293436683, |
|
"learning_rate": 5.107348334799897e-06, |
|
"loss": 0.4429, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.5471026222078342, |
|
"grad_norm": 1.1397480663693997, |
|
"learning_rate": 5.050852241776687e-06, |
|
"loss": 0.4806, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.5503399158303658, |
|
"grad_norm": 1.383755335534758, |
|
"learning_rate": 4.994349654703374e-06, |
|
"loss": 0.4677, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5535772094528973, |
|
"grad_norm": 1.0399853616381194, |
|
"learning_rate": 4.93784778920347e-06, |
|
"loss": 0.4544, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.5568145030754289, |
|
"grad_norm": 1.2261677975570098, |
|
"learning_rate": 4.881353860808337e-06, |
|
"loss": 0.4737, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.5600517966979605, |
|
"grad_norm": 1.3450655153335658, |
|
"learning_rate": 4.824875084035736e-06, |
|
"loss": 0.45, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.563289090320492, |
|
"grad_norm": 1.7303162242759071, |
|
"learning_rate": 4.768418671468502e-06, |
|
"loss": 0.4487, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.5665263839430237, |
|
"grad_norm": 1.2092323849987936, |
|
"learning_rate": 4.711991832833458e-06, |
|
"loss": 0.4496, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.5697636775655552, |
|
"grad_norm": 1.1729698153300774, |
|
"learning_rate": 4.655601774080716e-06, |
|
"loss": 0.4412, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.5730009711880868, |
|
"grad_norm": 1.8430576640984782, |
|
"learning_rate": 4.599255696463434e-06, |
|
"loss": 0.4794, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.5762382648106184, |
|
"grad_norm": 1.2327240635393302, |
|
"learning_rate": 4.542960795618194e-06, |
|
"loss": 0.447, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.5794755584331499, |
|
"grad_norm": 1.5385068868409182, |
|
"learning_rate": 4.486724260646083e-06, |
|
"loss": 0.4335, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.5827128520556815, |
|
"grad_norm": 1.5225619416563714, |
|
"learning_rate": 4.430553273194609e-06, |
|
"loss": 0.4441, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.585950145678213, |
|
"grad_norm": 1.4895774823409547, |
|
"learning_rate": 4.3744550065405864e-06, |
|
"loss": 0.4841, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.5891874393007446, |
|
"grad_norm": 1.5703613121834772, |
|
"learning_rate": 4.318436624674067e-06, |
|
"loss": 0.4463, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.5924247329232761, |
|
"grad_norm": 1.1534441333167642, |
|
"learning_rate": 4.262505281383476e-06, |
|
"loss": 0.4506, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.5956620265458077, |
|
"grad_norm": 1.2410143224549055, |
|
"learning_rate": 4.206668119342033e-06, |
|
"loss": 0.4382, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.5988993201683392, |
|
"grad_norm": 1.944311167675598, |
|
"learning_rate": 4.15093226919561e-06, |
|
"loss": 0.4344, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.6021366137908708, |
|
"grad_norm": 1.0718668211688862, |
|
"learning_rate": 4.0953048486521105e-06, |
|
"loss": 0.4537, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.6053739074134024, |
|
"grad_norm": 1.3196234174557926, |
|
"learning_rate": 4.039792961572513e-06, |
|
"loss": 0.4349, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.6086112010359339, |
|
"grad_norm": 1.4850011207756155, |
|
"learning_rate": 3.984403697063679e-06, |
|
"loss": 0.4465, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.6118484946584655, |
|
"grad_norm": 1.1052614698421046, |
|
"learning_rate": 3.929144128573035e-06, |
|
"loss": 0.4404, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.6150857882809971, |
|
"grad_norm": 1.2374254994171028, |
|
"learning_rate": 3.87402131298527e-06, |
|
"loss": 0.4342, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6183230819035287, |
|
"grad_norm": 1.1059894750691, |
|
"learning_rate": 3.819042289721139e-06, |
|
"loss": 0.4389, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.6215603755260602, |
|
"grad_norm": 1.1534915703249282, |
|
"learning_rate": 3.764214079838496e-06, |
|
"loss": 0.4475, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.6247976691485918, |
|
"grad_norm": 1.2567624750134727, |
|
"learning_rate": 3.7095436851356813e-06, |
|
"loss": 0.4251, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.6280349627711234, |
|
"grad_norm": 1.3966399419605446, |
|
"learning_rate": 3.655038087257356e-06, |
|
"loss": 0.4152, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.6312722563936549, |
|
"grad_norm": 2.325304921982081, |
|
"learning_rate": 3.6007042468029174e-06, |
|
"loss": 0.4353, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.6345095500161865, |
|
"grad_norm": 1.8152216504321872, |
|
"learning_rate": 3.5465491024375983e-06, |
|
"loss": 0.4304, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.637746843638718, |
|
"grad_norm": 1.4667613587733686, |
|
"learning_rate": 3.4925795700063735e-06, |
|
"loss": 0.4482, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.6409841372612496, |
|
"grad_norm": 1.5490376161227752, |
|
"learning_rate": 3.438802541650779e-06, |
|
"loss": 0.4544, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.6442214308837811, |
|
"grad_norm": 1.073502513354859, |
|
"learning_rate": 3.3852248849287473e-06, |
|
"loss": 0.4502, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.6474587245063127, |
|
"grad_norm": 1.4150494245382768, |
|
"learning_rate": 3.3318534419375962e-06, |
|
"loss": 0.4553, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6506960181288443, |
|
"grad_norm": 1.1873711967885243, |
|
"learning_rate": 3.278695028440265e-06, |
|
"loss": 0.4719, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.6539333117513758, |
|
"grad_norm": 1.0277166813799339, |
|
"learning_rate": 3.2257564329949033e-06, |
|
"loss": 0.446, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.6571706053739074, |
|
"grad_norm": 1.6303778205817352, |
|
"learning_rate": 3.1730444160879603e-06, |
|
"loss": 0.4514, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.6604078989964389, |
|
"grad_norm": 1.957827164457282, |
|
"learning_rate": 3.120565709270822e-06, |
|
"loss": 0.4364, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.6636451926189706, |
|
"grad_norm": 1.1300424316339148, |
|
"learning_rate": 3.0683270143001744e-06, |
|
"loss": 0.4573, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.6668824862415021, |
|
"grad_norm": 1.3350517057914033, |
|
"learning_rate": 3.01633500228216e-06, |
|
"loss": 0.4395, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.6701197798640337, |
|
"grad_norm": 2.4966191319455073, |
|
"learning_rate": 2.9645963128204426e-06, |
|
"loss": 0.4562, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.6733570734865653, |
|
"grad_norm": 1.162281286522959, |
|
"learning_rate": 2.91311755316831e-06, |
|
"loss": 0.4415, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.6765943671090968, |
|
"grad_norm": 1.2771987132753222, |
|
"learning_rate": 2.8619052973848936e-06, |
|
"loss": 0.4513, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.6798316607316284, |
|
"grad_norm": 1.558113731713831, |
|
"learning_rate": 2.8109660854956324e-06, |
|
"loss": 0.4409, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.6830689543541599, |
|
"grad_norm": 2.1857614420408398, |
|
"learning_rate": 2.760306422657083e-06, |
|
"loss": 0.428, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.6863062479766915, |
|
"grad_norm": 1.4005789104812565, |
|
"learning_rate": 2.7099327783261905e-06, |
|
"loss": 0.4171, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.689543541599223, |
|
"grad_norm": 1.3198113477885867, |
|
"learning_rate": 2.6598515854341046e-06, |
|
"loss": 0.4222, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.6927808352217546, |
|
"grad_norm": 1.552097021806336, |
|
"learning_rate": 2.610069239564663e-06, |
|
"loss": 0.4374, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.6960181288442862, |
|
"grad_norm": 1.5875723544475318, |
|
"learning_rate": 2.5605920981376607e-06, |
|
"loss": 0.4329, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.6992554224668177, |
|
"grad_norm": 1.7408409217582645, |
|
"learning_rate": 2.5114264795969658e-06, |
|
"loss": 0.4255, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.7024927160893493, |
|
"grad_norm": 1.4137585560938575, |
|
"learning_rate": 2.4625786626036315e-06, |
|
"loss": 0.4553, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.7057300097118808, |
|
"grad_norm": 1.2013119499594673, |
|
"learning_rate": 2.4140548852340924e-06, |
|
"loss": 0.4272, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.7089673033344124, |
|
"grad_norm": 1.554021107843274, |
|
"learning_rate": 2.3658613441835275e-06, |
|
"loss": 0.4148, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.712204596956944, |
|
"grad_norm": 1.0745275989784961, |
|
"learning_rate": 2.318004193974513e-06, |
|
"loss": 0.4489, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7154418905794756, |
|
"grad_norm": 1.224435407580586, |
|
"learning_rate": 2.2704895461710673e-06, |
|
"loss": 0.4335, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.7186791842020072, |
|
"grad_norm": 1.4400592081825279, |
|
"learning_rate": 2.2233234685981814e-06, |
|
"loss": 0.438, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.7219164778245387, |
|
"grad_norm": 1.1095915569574144, |
|
"learning_rate": 2.1765119845669273e-06, |
|
"loss": 0.4193, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.7251537714470703, |
|
"grad_norm": 1.1009788319616933, |
|
"learning_rate": 2.130061072105252e-06, |
|
"loss": 0.4167, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.7283910650696018, |
|
"grad_norm": 1.0172586653465259, |
|
"learning_rate": 2.083976663194567e-06, |
|
"loss": 0.4592, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.7316283586921334, |
|
"grad_norm": 1.1509992611813928, |
|
"learning_rate": 2.0382646430121962e-06, |
|
"loss": 0.4288, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.7348656523146649, |
|
"grad_norm": 1.253268879076886, |
|
"learning_rate": 1.992930849179827e-06, |
|
"loss": 0.4255, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.7381029459371965, |
|
"grad_norm": 1.4917991891879472, |
|
"learning_rate": 1.9479810710180124e-06, |
|
"loss": 0.4259, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.741340239559728, |
|
"grad_norm": 1.3293852268645232, |
|
"learning_rate": 1.9034210488068505e-06, |
|
"loss": 0.4258, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.7445775331822596, |
|
"grad_norm": 1.955770547143747, |
|
"learning_rate": 1.8592564730529268e-06, |
|
"loss": 0.4331, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.7478148268047912, |
|
"grad_norm": 4.285906632966228, |
|
"learning_rate": 1.815492983762614e-06, |
|
"loss": 0.41, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.7510521204273227, |
|
"grad_norm": 1.3399086538893303, |
|
"learning_rate": 1.7721361697218192e-06, |
|
"loss": 0.431, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.7542894140498543, |
|
"grad_norm": 1.7551709481473476, |
|
"learning_rate": 1.7291915677822668e-06, |
|
"loss": 0.4345, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.7575267076723858, |
|
"grad_norm": 1.2402942552904213, |
|
"learning_rate": 1.6866646621544213e-06, |
|
"loss": 0.4412, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.7607640012949175, |
|
"grad_norm": 1.454337649781767, |
|
"learning_rate": 1.6445608837071363e-06, |
|
"loss": 0.4372, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.7640012949174491, |
|
"grad_norm": 1.3605659792377778, |
|
"learning_rate": 1.6028856092740975e-06, |
|
"loss": 0.4275, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.7672385885399806, |
|
"grad_norm": 1.6891447458348832, |
|
"learning_rate": 1.5616441609671868e-06, |
|
"loss": 0.4345, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.7704758821625122, |
|
"grad_norm": 1.36054087661354, |
|
"learning_rate": 1.5208418054968255e-06, |
|
"loss": 0.4303, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.7737131757850437, |
|
"grad_norm": 2.1532676084820004, |
|
"learning_rate": 1.4804837534993855e-06, |
|
"loss": 0.416, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.7769504694075753, |
|
"grad_norm": 1.0925060282525623, |
|
"learning_rate": 1.4405751588717743e-06, |
|
"loss": 0.4132, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.7801877630301068, |
|
"grad_norm": 1.8326582821797257, |
|
"learning_rate": 1.4011211181132612e-06, |
|
"loss": 0.4367, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.7834250566526384, |
|
"grad_norm": 1.3611403654592065, |
|
"learning_rate": 1.3621266696746305e-06, |
|
"loss": 0.4268, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.78666235027517, |
|
"grad_norm": 1.5691241422304574, |
|
"learning_rate": 1.3235967933147482e-06, |
|
"loss": 0.4155, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.7898996438977015, |
|
"grad_norm": 1.1610560668530532, |
|
"learning_rate": 1.2855364094646239e-06, |
|
"loss": 0.4081, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.7931369375202331, |
|
"grad_norm": 1.3174765562689756, |
|
"learning_rate": 1.24795037859906e-06, |
|
"loss": 0.4276, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.7963742311427646, |
|
"grad_norm": 1.5724114202346193, |
|
"learning_rate": 1.2108435006159352e-06, |
|
"loss": 0.4266, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.7996115247652962, |
|
"grad_norm": 2.5186489719168477, |
|
"learning_rate": 1.1742205142232472e-06, |
|
"loss": 0.4272, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.8028488183878277, |
|
"grad_norm": 1.2269224379818156, |
|
"learning_rate": 1.1380860963339551e-06, |
|
"loss": 0.4125, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.8060861120103593, |
|
"grad_norm": 1.6275541333455594, |
|
"learning_rate": 1.1024448614687154e-06, |
|
"loss": 0.432, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.809323405632891, |
|
"grad_norm": 1.6966584659345751, |
|
"learning_rate": 1.0673013611665912e-06, |
|
"loss": 0.4408, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8125606992554225, |
|
"grad_norm": 2.3214553433226452, |
|
"learning_rate": 1.0326600834038003e-06, |
|
"loss": 0.4341, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.8157979928779541, |
|
"grad_norm": 1.7966203592073668, |
|
"learning_rate": 9.985254520205827e-07, |
|
"loss": 0.4381, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.8190352865004856, |
|
"grad_norm": 1.558277834279681, |
|
"learning_rate": 9.649018261562515e-07, |
|
"loss": 0.4273, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.8222725801230172, |
|
"grad_norm": 1.4720652884485796, |
|
"learning_rate": 9.31793499692521e-07, |
|
"loss": 0.403, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.8255098737455487, |
|
"grad_norm": 1.2990539813318709, |
|
"learning_rate": 8.992047007051502e-07, |
|
"loss": 0.411, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.8287471673680803, |
|
"grad_norm": 1.588973123293107, |
|
"learning_rate": 8.671395909240054e-07, |
|
"loss": 0.4426, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.8319844609906119, |
|
"grad_norm": 1.5375275679186977, |
|
"learning_rate": 8.356022652015888e-07, |
|
"loss": 0.4356, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.8352217546131434, |
|
"grad_norm": 2.0458866674407856, |
|
"learning_rate": 8.04596750990107e-07, |
|
"loss": 0.4332, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.838459048235675, |
|
"grad_norm": 1.3170208953439513, |
|
"learning_rate": 7.741270078271473e-07, |
|
"loss": 0.4298, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.8416963418582065, |
|
"grad_norm": 1.6800797824988918, |
|
"learning_rate": 7.441969268300264e-07, |
|
"loss": 0.4308, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.8449336354807381, |
|
"grad_norm": 1.4007822337788043, |
|
"learning_rate": 7.148103301988846e-07, |
|
"loss": 0.4369, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.8481709291032696, |
|
"grad_norm": 1.6939881245355604, |
|
"learning_rate": 6.859709707285683e-07, |
|
"loss": 0.393, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.8514082227258012, |
|
"grad_norm": 1.2641245106504042, |
|
"learning_rate": 6.576825313293811e-07, |
|
"loss": 0.4156, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.8546455163483327, |
|
"grad_norm": 1.0467779769347487, |
|
"learning_rate": 6.299486245567677e-07, |
|
"loss": 0.4187, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.8578828099708644, |
|
"grad_norm": 1.3379069751937747, |
|
"learning_rate": 6.027727921499654e-07, |
|
"loss": 0.4391, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.861120103593396, |
|
"grad_norm": 1.2248200595811436, |
|
"learning_rate": 5.76158504579713e-07, |
|
"loss": 0.4062, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.8643573972159275, |
|
"grad_norm": 1.855147034712074, |
|
"learning_rate": 5.501091606050646e-07, |
|
"loss": 0.3993, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.8675946908384591, |
|
"grad_norm": 1.0581073840185136, |
|
"learning_rate": 5.246280868393389e-07, |
|
"loss": 0.4208, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.8708319844609906, |
|
"grad_norm": 1.8680186317258003, |
|
"learning_rate": 4.997185373253038e-07, |
|
"loss": 0.4205, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.8740692780835222, |
|
"grad_norm": 1.1556112226095094, |
|
"learning_rate": 4.7538369311962595e-07, |
|
"loss": 0.436, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8773065717060538, |
|
"grad_norm": 1.2396365189028185, |
|
"learning_rate": 4.5162666188662553e-07, |
|
"loss": 0.4054, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.8805438653285853, |
|
"grad_norm": 1.8606848711011432, |
|
"learning_rate": 4.2845047750142364e-07, |
|
"loss": 0.4381, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.8837811589511169, |
|
"grad_norm": 1.2713949286294475, |
|
"learning_rate": 4.058580996624961e-07, |
|
"loss": 0.4261, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.8870184525736484, |
|
"grad_norm": 1.169673887026325, |
|
"learning_rate": 3.838524135137145e-07, |
|
"loss": 0.4314, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.89025574619618, |
|
"grad_norm": 1.4698696332980015, |
|
"learning_rate": 3.624362292758932e-07, |
|
"loss": 0.4261, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.8934930398187115, |
|
"grad_norm": 1.117701112285988, |
|
"learning_rate": 3.416122818879164e-07, |
|
"loss": 0.414, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.8967303334412431, |
|
"grad_norm": 1.8690202090261214, |
|
"learning_rate": 3.2138323065747767e-07, |
|
"loss": 0.4233, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.8999676270637746, |
|
"grad_norm": 1.9444565939145249, |
|
"learning_rate": 3.0175165892146696e-07, |
|
"loss": 0.4314, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.9032049206863062, |
|
"grad_norm": 1.7268620572069362, |
|
"learning_rate": 2.8272007371607237e-07, |
|
"loss": 0.4092, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.9064422143088378, |
|
"grad_norm": 1.3013418095252707, |
|
"learning_rate": 2.642909054566234e-07, |
|
"loss": 0.4151, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.9096795079313694, |
|
"grad_norm": 1.0436149624026436, |
|
"learning_rate": 2.4646650762720936e-07, |
|
"loss": 0.406, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.912916801553901, |
|
"grad_norm": 1.1874840383962737, |
|
"learning_rate": 2.2924915648013578e-07, |
|
"loss": 0.4279, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.9161540951764325, |
|
"grad_norm": 1.9059543067602664, |
|
"learning_rate": 2.1264105074523367e-07, |
|
"loss": 0.4337, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.9193913887989641, |
|
"grad_norm": 1.4884966997206766, |
|
"learning_rate": 1.966443113490729e-07, |
|
"loss": 0.412, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.9226286824214956, |
|
"grad_norm": 1.3718372730811397, |
|
"learning_rate": 1.8126098114411073e-07, |
|
"loss": 0.422, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.9258659760440272, |
|
"grad_norm": 1.963050281617195, |
|
"learning_rate": 1.6649302464781304e-07, |
|
"loss": 0.4247, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.9291032696665588, |
|
"grad_norm": 1.3124207972289312, |
|
"learning_rate": 1.523423277917735e-07, |
|
"loss": 0.4284, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.9323405632890903, |
|
"grad_norm": 1.033110356628065, |
|
"learning_rate": 1.3881069768087285e-07, |
|
"loss": 0.4147, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.9355778569116219, |
|
"grad_norm": 1.298106011447868, |
|
"learning_rate": 1.258998623625063e-07, |
|
"loss": 0.4221, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.9388151505341534, |
|
"grad_norm": 1.1556899802309393, |
|
"learning_rate": 1.1361147060590161e-07, |
|
"loss": 0.4084, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.942052444156685, |
|
"grad_norm": 1.4391398820149846, |
|
"learning_rate": 1.0194709169156491e-07, |
|
"loss": 0.4158, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.9452897377792165, |
|
"grad_norm": 1.2127363073000252, |
|
"learning_rate": 9.090821521087811e-08, |
|
"loss": 0.4157, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.9485270314017481, |
|
"grad_norm": 1.38644410758264, |
|
"learning_rate": 8.049625087587054e-08, |
|
"loss": 0.4288, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.9517643250242797, |
|
"grad_norm": 1.904590572303641, |
|
"learning_rate": 7.071252833919184e-08, |
|
"loss": 0.4213, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.9550016186468112, |
|
"grad_norm": 1.7465451060405173, |
|
"learning_rate": 6.15582970243117e-08, |
|
"loss": 0.4337, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.9582389122693429, |
|
"grad_norm": 1.933526856777722, |
|
"learning_rate": 5.3034725965960264e-08, |
|
"loss": 0.4172, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.9614762058918744, |
|
"grad_norm": 1.7005469732529432, |
|
"learning_rate": 4.514290366084195e-08, |
|
"loss": 0.4198, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.964713499514406, |
|
"grad_norm": 1.0515454431201907, |
|
"learning_rate": 3.788383792862393e-08, |
|
"loss": 0.4466, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.9679507931369375, |
|
"grad_norm": 1.454755463123962, |
|
"learning_rate": 3.125845578323739e-08, |
|
"loss": 0.4146, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.9711880867594691, |
|
"grad_norm": 1.3458009194327718, |
|
"learning_rate": 2.5267603314493848e-08, |
|
"loss": 0.3876, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.9744253803820007, |
|
"grad_norm": 1.140273366577025, |
|
"learning_rate": 1.991204558003168e-08, |
|
"loss": 0.4236, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.9776626740045322, |
|
"grad_norm": 2.5414368780936645, |
|
"learning_rate": 1.5192466507619742e-08, |
|
"loss": 0.4246, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.9808999676270638, |
|
"grad_norm": 1.7691943119432856, |
|
"learning_rate": 1.110946880781616e-08, |
|
"loss": 0.4249, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.9841372612495953, |
|
"grad_norm": 1.8831840560064748, |
|
"learning_rate": 7.663573896996568e-09, |
|
"loss": 0.4331, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.9873745548721269, |
|
"grad_norm": 1.2855991887240172, |
|
"learning_rate": 4.855221830768475e-09, |
|
"loss": 0.4004, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.9906118484946584, |
|
"grad_norm": 1.0300289207398134, |
|
"learning_rate": 2.684771247776774e-09, |
|
"loss": 0.4518, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.99384914211719, |
|
"grad_norm": 1.7128696766597806, |
|
"learning_rate": 1.1524993239003801e-09, |
|
"loss": 0.4115, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.9970864357397216, |
|
"grad_norm": 1.9683937477037325, |
|
"learning_rate": 2.5860173685721134e-10, |
|
"loss": 0.4285, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3089, |
|
"total_flos": 3417304969773056.0, |
|
"train_loss": 0.47494275417865073, |
|
"train_runtime": 147771.5827, |
|
"train_samples_per_second": 2.675, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3089, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3417304969773056.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|