|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9985842378480414, |
|
"eval_steps": 500, |
|
"global_step": 3177, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.009438414346389807, |
|
"grad_norm": 0.7047261682571029, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"loss": 0.3368, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.018876828692779613, |
|
"grad_norm": 0.44565794909772116, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.2696, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.028315243039169418, |
|
"grad_norm": 0.2469001773100438, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2345, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.037753657385559226, |
|
"grad_norm": 0.17504888016598083, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.213, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04719207173194903, |
|
"grad_norm": 0.14319985578848382, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 0.194, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.056630486078338836, |
|
"grad_norm": 0.1594795618687463, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1871, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06606890042472864, |
|
"grad_norm": 0.14625288121586646, |
|
"learning_rate": 1.4583333333333333e-05, |
|
"loss": 0.1791, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07550731477111845, |
|
"grad_norm": 0.44536813060583275, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.1777, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08494572911750826, |
|
"grad_norm": 0.17858201185676476, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.1743, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09438414346389806, |
|
"grad_norm": 0.22679600482590775, |
|
"learning_rate": 1.9999916822524766e-05, |
|
"loss": 0.173, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10382255781028787, |
|
"grad_norm": 0.2245846959065181, |
|
"learning_rate": 1.999898109181919e-05, |
|
"loss": 0.1697, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.11326097215667767, |
|
"grad_norm": 0.21274961909318216, |
|
"learning_rate": 1.9997005756177228e-05, |
|
"loss": 0.1669, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.12269938650306748, |
|
"grad_norm": 0.15535944242468744, |
|
"learning_rate": 1.999399102097668e-05, |
|
"loss": 0.1664, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.13213780084945728, |
|
"grad_norm": 0.1677351780597522, |
|
"learning_rate": 1.9989937199662845e-05, |
|
"loss": 0.1652, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1415762151958471, |
|
"grad_norm": 0.15664753634679404, |
|
"learning_rate": 1.998484471371593e-05, |
|
"loss": 0.1619, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1510146295422369, |
|
"grad_norm": 0.1936710831336268, |
|
"learning_rate": 1.9978714092607234e-05, |
|
"loss": 0.1606, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.16045304388862672, |
|
"grad_norm": 0.21595388620143963, |
|
"learning_rate": 1.9971545973744102e-05, |
|
"loss": 0.16, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.16989145823501653, |
|
"grad_norm": 0.19084712938417736, |
|
"learning_rate": 1.9963341102403652e-05, |
|
"loss": 0.1582, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1793298725814063, |
|
"grad_norm": 0.19755743286804991, |
|
"learning_rate": 1.9954100331655265e-05, |
|
"loss": 0.1551, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.18876828692779613, |
|
"grad_norm": 0.21785901423412252, |
|
"learning_rate": 1.9943824622271934e-05, |
|
"loss": 0.1559, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.19820670127418594, |
|
"grad_norm": 0.1472275149396309, |
|
"learning_rate": 1.9932515042630335e-05, |
|
"loss": 0.1534, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.20764511562057575, |
|
"grad_norm": 0.14184865805893884, |
|
"learning_rate": 1.9920172768599763e-05, |
|
"loss": 0.1545, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.21708352996696556, |
|
"grad_norm": 0.1833363522186809, |
|
"learning_rate": 1.9906799083419865e-05, |
|
"loss": 0.1543, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.22652194431335534, |
|
"grad_norm": 0.141212536462394, |
|
"learning_rate": 1.989239537756723e-05, |
|
"loss": 0.1538, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.23596035865974516, |
|
"grad_norm": 0.1556507383209179, |
|
"learning_rate": 1.987696314861082e-05, |
|
"loss": 0.1541, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.24539877300613497, |
|
"grad_norm": 0.17173283637998002, |
|
"learning_rate": 1.986050400105626e-05, |
|
"loss": 0.1518, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.25483718735252475, |
|
"grad_norm": 0.150629748875022, |
|
"learning_rate": 1.9843019646179014e-05, |
|
"loss": 0.1501, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.26427560169891456, |
|
"grad_norm": 0.19334006281958221, |
|
"learning_rate": 1.9824511901846475e-05, |
|
"loss": 0.1483, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2737140160453044, |
|
"grad_norm": 0.12527664802237196, |
|
"learning_rate": 1.9804982692328944e-05, |
|
"loss": 0.1514, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2831524303916942, |
|
"grad_norm": 0.19858199254198736, |
|
"learning_rate": 1.9784434048099565e-05, |
|
"loss": 0.151, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.292590844738084, |
|
"grad_norm": 0.1677799134496006, |
|
"learning_rate": 1.976286810562323e-05, |
|
"loss": 0.1498, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3020292590844738, |
|
"grad_norm": 0.15743349400071904, |
|
"learning_rate": 1.9740287107134417e-05, |
|
"loss": 0.1513, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3114676734308636, |
|
"grad_norm": 0.1616061413079364, |
|
"learning_rate": 1.97166934004041e-05, |
|
"loss": 0.1489, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.32090608777725343, |
|
"grad_norm": 0.1878283137943562, |
|
"learning_rate": 1.9692089438495622e-05, |
|
"loss": 0.1449, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.33034450212364325, |
|
"grad_norm": 0.16166637953640253, |
|
"learning_rate": 1.9666477779509655e-05, |
|
"loss": 0.1469, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.33978291647003306, |
|
"grad_norm": 0.12292532976422958, |
|
"learning_rate": 1.963986108631823e-05, |
|
"loss": 0.1468, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3492213308164228, |
|
"grad_norm": 0.16469874208354635, |
|
"learning_rate": 1.9612242126287876e-05, |
|
"loss": 0.1483, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3586597451628126, |
|
"grad_norm": 0.13369950841636608, |
|
"learning_rate": 1.958362377099191e-05, |
|
"loss": 0.1443, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.36809815950920244, |
|
"grad_norm": 0.12551292002845085, |
|
"learning_rate": 1.9554008995911837e-05, |
|
"loss": 0.1463, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.37753657385559225, |
|
"grad_norm": 0.14851921568961518, |
|
"learning_rate": 1.9523400880128032e-05, |
|
"loss": 0.1471, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.38697498820198206, |
|
"grad_norm": 0.13162403368451694, |
|
"learning_rate": 1.949180260599957e-05, |
|
"loss": 0.1452, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3964134025483719, |
|
"grad_norm": 0.12724887401811377, |
|
"learning_rate": 1.945921745883337e-05, |
|
"loss": 0.1455, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4058518168947617, |
|
"grad_norm": 0.11779906396951238, |
|
"learning_rate": 1.9425648826542618e-05, |
|
"loss": 0.1435, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4152902312411515, |
|
"grad_norm": 0.1610933457769652, |
|
"learning_rate": 1.939110019929451e-05, |
|
"loss": 0.1436, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.4247286455875413, |
|
"grad_norm": 0.12250958203512534, |
|
"learning_rate": 1.935557516914739e-05, |
|
"loss": 0.1451, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4341670599339311, |
|
"grad_norm": 0.1380572524992858, |
|
"learning_rate": 1.931907742967727e-05, |
|
"loss": 0.1444, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.44360547428032093, |
|
"grad_norm": 0.13646993698111895, |
|
"learning_rate": 1.92816107755938e-05, |
|
"loss": 0.142, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4530438886267107, |
|
"grad_norm": 0.11765542306036501, |
|
"learning_rate": 1.9243179102345753e-05, |
|
"loss": 0.1406, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.4624823029731005, |
|
"grad_norm": 0.1266567901893174, |
|
"learning_rate": 1.9203786405715984e-05, |
|
"loss": 0.144, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4719207173194903, |
|
"grad_norm": 0.1113634311573256, |
|
"learning_rate": 1.9163436781405992e-05, |
|
"loss": 0.1428, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4813591316658801, |
|
"grad_norm": 0.13808836428511967, |
|
"learning_rate": 1.912213442461009e-05, |
|
"loss": 0.1399, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.49079754601226994, |
|
"grad_norm": 0.1226613837593307, |
|
"learning_rate": 1.9079883629579224e-05, |
|
"loss": 0.1396, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5002359603586597, |
|
"grad_norm": 0.14272835200919645, |
|
"learning_rate": 1.9036688789174496e-05, |
|
"loss": 0.1403, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5096743747050495, |
|
"grad_norm": 0.12981510040553715, |
|
"learning_rate": 1.899255439441043e-05, |
|
"loss": 0.1399, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5191127890514393, |
|
"grad_norm": 0.1190871345092575, |
|
"learning_rate": 1.8947485033988034e-05, |
|
"loss": 0.1376, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5285512033978291, |
|
"grad_norm": 0.1271477738963388, |
|
"learning_rate": 1.8901485393817724e-05, |
|
"loss": 0.1415, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5379896177442189, |
|
"grad_norm": 0.12965211048846748, |
|
"learning_rate": 1.8854560256532098e-05, |
|
"loss": 0.1423, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5474280320906088, |
|
"grad_norm": 0.13373262160455968, |
|
"learning_rate": 1.880671450098871e-05, |
|
"loss": 0.139, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5568664464369986, |
|
"grad_norm": 0.1322939697550499, |
|
"learning_rate": 1.8757953101762786e-05, |
|
"loss": 0.1396, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5663048607833884, |
|
"grad_norm": 0.11918437239832326, |
|
"learning_rate": 1.8708281128630023e-05, |
|
"loss": 0.138, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5757432751297782, |
|
"grad_norm": 0.12338738357381479, |
|
"learning_rate": 1.865770374603948e-05, |
|
"loss": 0.1406, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.585181689476168, |
|
"grad_norm": 0.11573754594906395, |
|
"learning_rate": 1.8606226212576612e-05, |
|
"loss": 0.138, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5946201038225578, |
|
"grad_norm": 0.1419588706141848, |
|
"learning_rate": 1.8553853880416555e-05, |
|
"loss": 0.1408, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6040585181689476, |
|
"grad_norm": 0.13998266637185536, |
|
"learning_rate": 1.8500592194767625e-05, |
|
"loss": 0.1394, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6134969325153374, |
|
"grad_norm": 0.11868995175822014, |
|
"learning_rate": 1.8446446693305194e-05, |
|
"loss": 0.1384, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6229353468617272, |
|
"grad_norm": 0.1328472026088287, |
|
"learning_rate": 1.8391423005595928e-05, |
|
"loss": 0.1393, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6323737612081171, |
|
"grad_norm": 0.11726921800593894, |
|
"learning_rate": 1.833552685251246e-05, |
|
"loss": 0.1398, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6418121755545069, |
|
"grad_norm": 0.11466260187649016, |
|
"learning_rate": 1.827876404563861e-05, |
|
"loss": 0.1369, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6512505899008967, |
|
"grad_norm": 0.11234281014514101, |
|
"learning_rate": 1.8221140486665125e-05, |
|
"loss": 0.1346, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6606890042472865, |
|
"grad_norm": 0.11159741277810285, |
|
"learning_rate": 1.8162662166776085e-05, |
|
"loss": 0.1357, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6701274185936763, |
|
"grad_norm": 0.12752868267859116, |
|
"learning_rate": 1.8103335166026002e-05, |
|
"loss": 0.1389, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6795658329400661, |
|
"grad_norm": 0.12084535348559353, |
|
"learning_rate": 1.804316565270765e-05, |
|
"loss": 0.1375, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.6890042472864559, |
|
"grad_norm": 0.12102077085461252, |
|
"learning_rate": 1.798215988271075e-05, |
|
"loss": 0.1364, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.6984426616328456, |
|
"grad_norm": 0.11713742692774234, |
|
"learning_rate": 1.7920324198871546e-05, |
|
"loss": 0.138, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7078810759792354, |
|
"grad_norm": 0.11656355822805255, |
|
"learning_rate": 1.785766503031332e-05, |
|
"loss": 0.1346, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7173194903256253, |
|
"grad_norm": 0.11377328844943654, |
|
"learning_rate": 1.7794188891777964e-05, |
|
"loss": 0.1352, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.7267579046720151, |
|
"grad_norm": 0.12103799646507679, |
|
"learning_rate": 1.7729902382948617e-05, |
|
"loss": 0.1353, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.7361963190184049, |
|
"grad_norm": 0.1073585292390918, |
|
"learning_rate": 1.76648121877635e-05, |
|
"loss": 0.1352, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7456347333647947, |
|
"grad_norm": 0.11214075940260533, |
|
"learning_rate": 1.759892507372099e-05, |
|
"loss": 0.1341, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7550731477111845, |
|
"grad_norm": 0.11706899066793994, |
|
"learning_rate": 1.7532247891175968e-05, |
|
"loss": 0.1333, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7645115620575743, |
|
"grad_norm": 0.11789888505768062, |
|
"learning_rate": 1.746478757262761e-05, |
|
"loss": 0.136, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7739499764039641, |
|
"grad_norm": 0.11237848535926774, |
|
"learning_rate": 1.739655113199858e-05, |
|
"loss": 0.1336, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7833883907503539, |
|
"grad_norm": 0.10753154987431834, |
|
"learning_rate": 1.7327545663905813e-05, |
|
"loss": 0.1331, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7928268050967437, |
|
"grad_norm": 0.1441522552747225, |
|
"learning_rate": 1.7257778342922853e-05, |
|
"loss": 0.1328, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8022652194431336, |
|
"grad_norm": 0.1269707942863234, |
|
"learning_rate": 1.7187256422833928e-05, |
|
"loss": 0.1319, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8117036337895234, |
|
"grad_norm": 0.11091236494221275, |
|
"learning_rate": 1.711598723587975e-05, |
|
"loss": 0.1324, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8211420481359132, |
|
"grad_norm": 0.10854265306012167, |
|
"learning_rate": 1.7043978191995177e-05, |
|
"loss": 0.1325, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.830580462482303, |
|
"grad_norm": 0.1110467712060928, |
|
"learning_rate": 1.6971236778038806e-05, |
|
"loss": 0.1315, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.8400188768286928, |
|
"grad_norm": 0.12129611756408008, |
|
"learning_rate": 1.6897770557014535e-05, |
|
"loss": 0.1328, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.8494572911750826, |
|
"grad_norm": 0.106781748696916, |
|
"learning_rate": 1.682358716728525e-05, |
|
"loss": 0.1351, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8588957055214724, |
|
"grad_norm": 0.11020118439519076, |
|
"learning_rate": 1.674869432177864e-05, |
|
"loss": 0.1325, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.8683341198678622, |
|
"grad_norm": 0.11750342557908768, |
|
"learning_rate": 1.667309980718529e-05, |
|
"loss": 0.1312, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.877772534214252, |
|
"grad_norm": 0.12853148875033116, |
|
"learning_rate": 1.6596811483149077e-05, |
|
"loss": 0.1317, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8872109485606419, |
|
"grad_norm": 0.11393786746070304, |
|
"learning_rate": 1.651983728145e-05, |
|
"loss": 0.1355, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.8966493629070316, |
|
"grad_norm": 0.11013100846033319, |
|
"learning_rate": 1.6442185205179507e-05, |
|
"loss": 0.1309, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9060877772534214, |
|
"grad_norm": 0.10641286141618259, |
|
"learning_rate": 1.6363863327908405e-05, |
|
"loss": 0.1339, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.9155261915998112, |
|
"grad_norm": 0.11308702339858176, |
|
"learning_rate": 1.6284879792847433e-05, |
|
"loss": 0.1299, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.924964605946201, |
|
"grad_norm": 0.11315250527471539, |
|
"learning_rate": 1.620524281200062e-05, |
|
"loss": 0.1305, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.9344030202925908, |
|
"grad_norm": 0.0972875949252274, |
|
"learning_rate": 1.6124960665311447e-05, |
|
"loss": 0.1322, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.9438414346389806, |
|
"grad_norm": 0.10070713866086979, |
|
"learning_rate": 1.6044041699802005e-05, |
|
"loss": 0.129, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9532798489853704, |
|
"grad_norm": 0.11109074990403733, |
|
"learning_rate": 1.5962494328705123e-05, |
|
"loss": 0.1321, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.9627182633317602, |
|
"grad_norm": 0.1199186598391774, |
|
"learning_rate": 1.588032703058964e-05, |
|
"loss": 0.1334, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.9721566776781501, |
|
"grad_norm": 0.10777396066893469, |
|
"learning_rate": 1.5797548348478893e-05, |
|
"loss": 0.1325, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.9815950920245399, |
|
"grad_norm": 0.11999882098060052, |
|
"learning_rate": 1.571416688896246e-05, |
|
"loss": 0.132, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.9910335063709297, |
|
"grad_norm": 0.10911342083469809, |
|
"learning_rate": 1.563019132130136e-05, |
|
"loss": 0.1301, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.0004719207173194, |
|
"grad_norm": 0.11482143235010223, |
|
"learning_rate": 1.5545630376526665e-05, |
|
"loss": 0.1282, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.0099103350637093, |
|
"grad_norm": 0.11699392564682201, |
|
"learning_rate": 1.5460492846531748e-05, |
|
"loss": 0.1142, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.019348749410099, |
|
"grad_norm": 0.09530170230868218, |
|
"learning_rate": 1.5374787583158188e-05, |
|
"loss": 0.1157, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.028787163756489, |
|
"grad_norm": 0.09486822390799159, |
|
"learning_rate": 1.5288523497275392e-05, |
|
"loss": 0.1143, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.0382255781028786, |
|
"grad_norm": 0.09531194088354993, |
|
"learning_rate": 1.5201709557854178e-05, |
|
"loss": 0.1128, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.0476639924492686, |
|
"grad_norm": 0.11206649112299381, |
|
"learning_rate": 1.5114354791034225e-05, |
|
"loss": 0.1161, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.0571024067956583, |
|
"grad_norm": 0.10196697892456508, |
|
"learning_rate": 1.5026468279185615e-05, |
|
"loss": 0.1159, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.0665408211420482, |
|
"grad_norm": 0.10326390731228648, |
|
"learning_rate": 1.4938059159964555e-05, |
|
"loss": 0.1161, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.0759792354884379, |
|
"grad_norm": 0.09969628777021497, |
|
"learning_rate": 1.4849136625363297e-05, |
|
"loss": 0.1141, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.0854176498348278, |
|
"grad_norm": 0.0939091288214549, |
|
"learning_rate": 1.4759709920754453e-05, |
|
"loss": 0.1125, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.0948560641812175, |
|
"grad_norm": 0.09506323252337912, |
|
"learning_rate": 1.4669788343929736e-05, |
|
"loss": 0.1141, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.1042944785276074, |
|
"grad_norm": 0.10154441851850998, |
|
"learning_rate": 1.4579381244133265e-05, |
|
"loss": 0.1128, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.1137328928739971, |
|
"grad_norm": 0.11822773479215737, |
|
"learning_rate": 1.4488498021089514e-05, |
|
"loss": 0.1137, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.123171307220387, |
|
"grad_norm": 0.1009800661484683, |
|
"learning_rate": 1.4397148124025997e-05, |
|
"loss": 0.1143, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.1326097215667768, |
|
"grad_norm": 0.10126420585679885, |
|
"learning_rate": 1.4305341050690845e-05, |
|
"loss": 0.117, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.1420481359131667, |
|
"grad_norm": 0.09588874040008494, |
|
"learning_rate": 1.421308634636529e-05, |
|
"loss": 0.1137, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.1514865502595564, |
|
"grad_norm": 0.10922875430592754, |
|
"learning_rate": 1.412039360287126e-05, |
|
"loss": 0.1145, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.1609249646059463, |
|
"grad_norm": 0.11298890031757797, |
|
"learning_rate": 1.4027272457574082e-05, |
|
"loss": 0.1138, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.170363378952336, |
|
"grad_norm": 0.10857815603466196, |
|
"learning_rate": 1.3933732592380485e-05, |
|
"loss": 0.1135, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.1798017932987257, |
|
"grad_norm": 0.10213279825434321, |
|
"learning_rate": 1.3839783732731966e-05, |
|
"loss": 0.1134, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.1892402076451156, |
|
"grad_norm": 0.10027833977041692, |
|
"learning_rate": 1.3745435646593613e-05, |
|
"loss": 0.1136, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.1986786219915055, |
|
"grad_norm": 0.09590585357482817, |
|
"learning_rate": 1.3650698143438534e-05, |
|
"loss": 0.113, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.2081170363378952, |
|
"grad_norm": 0.10321073236266613, |
|
"learning_rate": 1.3555581073227942e-05, |
|
"loss": 0.1167, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.217555450684285, |
|
"grad_norm": 0.09327710523878686, |
|
"learning_rate": 1.346009432538705e-05, |
|
"loss": 0.1147, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.2269938650306749, |
|
"grad_norm": 0.0933025296287067, |
|
"learning_rate": 1.3364247827776854e-05, |
|
"loss": 0.1145, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.2364322793770646, |
|
"grad_norm": 0.09493771082819326, |
|
"learning_rate": 1.3268051545661937e-05, |
|
"loss": 0.1141, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.2458706937234545, |
|
"grad_norm": 0.10053484854502866, |
|
"learning_rate": 1.3171515480674342e-05, |
|
"loss": 0.1122, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.2553091080698442, |
|
"grad_norm": 0.1108335770631105, |
|
"learning_rate": 1.3074649669773716e-05, |
|
"loss": 0.1173, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.2647475224162341, |
|
"grad_norm": 0.10521299166726314, |
|
"learning_rate": 1.297746418420374e-05, |
|
"loss": 0.1103, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.2741859367626238, |
|
"grad_norm": 0.10478814209881943, |
|
"learning_rate": 1.2879969128445025e-05, |
|
"loss": 0.1122, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.2836243511090137, |
|
"grad_norm": 0.0969588608522638, |
|
"learning_rate": 1.2782174639164528e-05, |
|
"loss": 0.1112, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.2930627654554034, |
|
"grad_norm": 0.10783687256200783, |
|
"learning_rate": 1.2684090884161636e-05, |
|
"loss": 0.1125, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.3025011798017934, |
|
"grad_norm": 0.10076692580892369, |
|
"learning_rate": 1.2585728061311003e-05, |
|
"loss": 0.1107, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.311939594148183, |
|
"grad_norm": 0.09895358395270354, |
|
"learning_rate": 1.248709639750228e-05, |
|
"loss": 0.1122, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.321378008494573, |
|
"grad_norm": 0.10215738990006902, |
|
"learning_rate": 1.2388206147576796e-05, |
|
"loss": 0.1124, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.3308164228409627, |
|
"grad_norm": 0.09611638665301472, |
|
"learning_rate": 1.2289067593261358e-05, |
|
"loss": 0.1151, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.3402548371873526, |
|
"grad_norm": 0.09899073401009041, |
|
"learning_rate": 1.2189691042099265e-05, |
|
"loss": 0.1124, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.3496932515337423, |
|
"grad_norm": 0.1157109248340035, |
|
"learning_rate": 1.209008682637859e-05, |
|
"loss": 0.1154, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.359131665880132, |
|
"grad_norm": 0.09358448441833775, |
|
"learning_rate": 1.1990265302057948e-05, |
|
"loss": 0.1127, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.368570080226522, |
|
"grad_norm": 0.10318474117014907, |
|
"learning_rate": 1.1890236847689762e-05, |
|
"loss": 0.1134, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.3780084945729119, |
|
"grad_norm": 0.10507403584009326, |
|
"learning_rate": 1.1790011863341197e-05, |
|
"loss": 0.1145, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.3874469089193016, |
|
"grad_norm": 0.09210160678217687, |
|
"learning_rate": 1.1689600769512855e-05, |
|
"loss": 0.1128, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.3968853232656913, |
|
"grad_norm": 0.09890271495599744, |
|
"learning_rate": 1.1589014006055337e-05, |
|
"loss": 0.1158, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.4063237376120812, |
|
"grad_norm": 0.09768042621781026, |
|
"learning_rate": 1.1488262031083816e-05, |
|
"loss": 0.1107, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.415762151958471, |
|
"grad_norm": 0.09501221720590393, |
|
"learning_rate": 1.1387355319890685e-05, |
|
"loss": 0.1138, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.4252005663048608, |
|
"grad_norm": 0.09098951194154016, |
|
"learning_rate": 1.1286304363856418e-05, |
|
"loss": 0.112, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.4346389806512505, |
|
"grad_norm": 0.08856225785605727, |
|
"learning_rate": 1.1185119669358792e-05, |
|
"loss": 0.1137, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.4440773949976404, |
|
"grad_norm": 0.09111265321801558, |
|
"learning_rate": 1.1083811756680523e-05, |
|
"loss": 0.1093, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.4535158093440301, |
|
"grad_norm": 0.09329783759350743, |
|
"learning_rate": 1.0982391158915441e-05, |
|
"loss": 0.1138, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.46295422369042, |
|
"grad_norm": 0.09114905230583735, |
|
"learning_rate": 1.0880868420873375e-05, |
|
"loss": 0.1135, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.4723926380368098, |
|
"grad_norm": 0.10049302399783284, |
|
"learning_rate": 1.0779254097983788e-05, |
|
"loss": 0.1104, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.4818310523831997, |
|
"grad_norm": 0.08811790258148439, |
|
"learning_rate": 1.0677558755198327e-05, |
|
"loss": 0.114, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.4912694667295894, |
|
"grad_norm": 0.09402068008649977, |
|
"learning_rate": 1.0575792965892349e-05, |
|
"loss": 0.1112, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.500707881075979, |
|
"grad_norm": 0.09144293350756144, |
|
"learning_rate": 1.0473967310765629e-05, |
|
"loss": 0.1099, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.510146295422369, |
|
"grad_norm": 0.08887643971229772, |
|
"learning_rate": 1.0372092376742247e-05, |
|
"loss": 0.1109, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.519584709768759, |
|
"grad_norm": 0.09042876745354687, |
|
"learning_rate": 1.0270178755869861e-05, |
|
"loss": 0.1123, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.5290231241151486, |
|
"grad_norm": 0.08799872003450031, |
|
"learning_rate": 1.0168237044218452e-05, |
|
"loss": 0.1088, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 0.08558681617619375, |
|
"learning_rate": 1.0066277840778626e-05, |
|
"loss": 0.1125, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.5478999528079282, |
|
"grad_norm": 0.08917702916102198, |
|
"learning_rate": 9.964311746359631e-06, |
|
"loss": 0.1078, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.5573383671543182, |
|
"grad_norm": 0.09365014825092945, |
|
"learning_rate": 9.862349362487172e-06, |
|
"loss": 0.108, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.5667767815007079, |
|
"grad_norm": 0.08881624424784158, |
|
"learning_rate": 9.760401290301164e-06, |
|
"loss": 0.1073, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.5762151958470976, |
|
"grad_norm": 0.09040629927788134, |
|
"learning_rate": 9.658478129453532e-06, |
|
"loss": 0.1095, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.5856536101934875, |
|
"grad_norm": 0.09668654356646131, |
|
"learning_rate": 9.556590477006123e-06, |
|
"loss": 0.109, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.5950920245398774, |
|
"grad_norm": 0.08945527030759594, |
|
"learning_rate": 9.454748926328962e-06, |
|
"loss": 0.1111, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.6045304388862671, |
|
"grad_norm": 0.09096700102455489, |
|
"learning_rate": 9.352964065998801e-06, |
|
"loss": 0.1091, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.6139688532326568, |
|
"grad_norm": 0.098227701202526, |
|
"learning_rate": 9.251246478698242e-06, |
|
"loss": 0.1124, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.6234072675790467, |
|
"grad_norm": 0.08702267003826049, |
|
"learning_rate": 9.149606740115444e-06, |
|
"loss": 0.1091, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.6328456819254367, |
|
"grad_norm": 0.0912741057496456, |
|
"learning_rate": 9.04805541784454e-06, |
|
"loss": 0.1084, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.6422840962718264, |
|
"grad_norm": 0.09473199957469115, |
|
"learning_rate": 8.946603070286926e-06, |
|
"loss": 0.1071, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.651722510618216, |
|
"grad_norm": 0.09786418318351309, |
|
"learning_rate": 8.845260245553493e-06, |
|
"loss": 0.1106, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.661160924964606, |
|
"grad_norm": 0.09376970400308367, |
|
"learning_rate": 8.744037480367922e-06, |
|
"loss": 0.1095, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.670599339310996, |
|
"grad_norm": 0.09927967174299174, |
|
"learning_rate": 8.642945298971168e-06, |
|
"loss": 0.1086, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.6800377536573856, |
|
"grad_norm": 0.08941325714107755, |
|
"learning_rate": 8.54199421202726e-06, |
|
"loss": 0.1096, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.6894761680037753, |
|
"grad_norm": 0.09076378690689199, |
|
"learning_rate": 8.441194715530472e-06, |
|
"loss": 0.111, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.6989145823501652, |
|
"grad_norm": 0.08350889813597084, |
|
"learning_rate": 8.340557289714055e-06, |
|
"loss": 0.1089, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.708352996696555, |
|
"grad_norm": 0.1102767062542333, |
|
"learning_rate": 8.240092397960601e-06, |
|
"loss": 0.1077, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.7177914110429446, |
|
"grad_norm": 0.0928092514701947, |
|
"learning_rate": 8.139810485714142e-06, |
|
"loss": 0.109, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.7272298253893346, |
|
"grad_norm": 0.08827741386544802, |
|
"learning_rate": 8.03972197939414e-06, |
|
"loss": 0.1103, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.7366682397357245, |
|
"grad_norm": 0.09074532027658296, |
|
"learning_rate": 7.939837285311425e-06, |
|
"loss": 0.106, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.7461066540821142, |
|
"grad_norm": 0.08863060213083432, |
|
"learning_rate": 7.840166788586244e-06, |
|
"loss": 0.1111, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.7555450684285039, |
|
"grad_norm": 0.0873194235737418, |
|
"learning_rate": 7.740720852068524e-06, |
|
"loss": 0.1107, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.7649834827748938, |
|
"grad_norm": 0.09463118593541094, |
|
"learning_rate": 7.641509815260412e-06, |
|
"loss": 0.1067, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.7744218971212837, |
|
"grad_norm": 0.08277999614215281, |
|
"learning_rate": 7.542543993241278e-06, |
|
"loss": 0.1092, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.7838603114676734, |
|
"grad_norm": 0.08350764579820083, |
|
"learning_rate": 7.443833675595254e-06, |
|
"loss": 0.1033, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.7932987258140631, |
|
"grad_norm": 0.0892168369121696, |
|
"learning_rate": 7.3453891253413935e-06, |
|
"loss": 0.1088, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.802737140160453, |
|
"grad_norm": 0.09068391166704463, |
|
"learning_rate": 7.247220577866625e-06, |
|
"loss": 0.1074, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.812175554506843, |
|
"grad_norm": 0.09208367707491026, |
|
"learning_rate": 7.149338239861579e-06, |
|
"loss": 0.1069, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.8216139688532327, |
|
"grad_norm": 0.09334448658561058, |
|
"learning_rate": 7.051752288259366e-06, |
|
"loss": 0.1051, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.8310523831996224, |
|
"grad_norm": 0.0867013966015152, |
|
"learning_rate": 6.954472869177479e-06, |
|
"loss": 0.1071, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.8404907975460123, |
|
"grad_norm": 0.08513824070105314, |
|
"learning_rate": 6.857510096862901e-06, |
|
"loss": 0.108, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.8499292118924022, |
|
"grad_norm": 0.08880515925379688, |
|
"learning_rate": 6.760874052640494e-06, |
|
"loss": 0.1081, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.859367626238792, |
|
"grad_norm": 0.09395118992476542, |
|
"learning_rate": 6.664574783864862e-06, |
|
"loss": 0.1079, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.8688060405851816, |
|
"grad_norm": 0.09253843263366972, |
|
"learning_rate": 6.568622302875682e-06, |
|
"loss": 0.1068, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.8782444549315715, |
|
"grad_norm": 0.09103342170778085, |
|
"learning_rate": 6.473026585956736e-06, |
|
"loss": 0.106, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.8876828692779613, |
|
"grad_norm": 0.08266259287550586, |
|
"learning_rate": 6.377797572298661e-06, |
|
"loss": 0.1076, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.897121283624351, |
|
"grad_norm": 0.08360823987901486, |
|
"learning_rate": 6.282945162965548e-06, |
|
"loss": 0.1079, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.9065596979707409, |
|
"grad_norm": 0.09004863363551058, |
|
"learning_rate": 6.188479219865529e-06, |
|
"loss": 0.1064, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.9159981123171308, |
|
"grad_norm": 0.0931263680297109, |
|
"learning_rate": 6.094409564725435e-06, |
|
"loss": 0.1054, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.9254365266635205, |
|
"grad_norm": 0.09051504504706874, |
|
"learning_rate": 6.0007459780695885e-06, |
|
"loss": 0.1082, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.9348749410099102, |
|
"grad_norm": 0.0904194328886728, |
|
"learning_rate": 5.907498198202939e-06, |
|
"loss": 0.1081, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.9443133553563001, |
|
"grad_norm": 0.08711490496456058, |
|
"learning_rate": 5.8146759201985525e-06, |
|
"loss": 0.1069, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.95375176970269, |
|
"grad_norm": 0.08686808156213838, |
|
"learning_rate": 5.722288794889603e-06, |
|
"loss": 0.1064, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.9631901840490797, |
|
"grad_norm": 0.08817123970600604, |
|
"learning_rate": 5.630346427865965e-06, |
|
"loss": 0.1045, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.9726285983954694, |
|
"grad_norm": 0.08339231796976605, |
|
"learning_rate": 5.538858378475508e-06, |
|
"loss": 0.1066, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.9820670127418594, |
|
"grad_norm": 0.08824324960180577, |
|
"learning_rate": 5.447834158830202e-06, |
|
"loss": 0.1037, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.9915054270882493, |
|
"grad_norm": 0.09311929590923752, |
|
"learning_rate": 5.357283232817147e-06, |
|
"loss": 0.1054, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.0009438414346388, |
|
"grad_norm": 0.1334431719993605, |
|
"learning_rate": 5.267215015114574e-06, |
|
"loss": 0.1031, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.0103822557810287, |
|
"grad_norm": 0.10361861757914052, |
|
"learning_rate": 5.177638870213008e-06, |
|
"loss": 0.0868, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.0198206701274186, |
|
"grad_norm": 0.084172782289755, |
|
"learning_rate": 5.088564111441645e-06, |
|
"loss": 0.0834, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.0292590844738085, |
|
"grad_norm": 0.08727410339549913, |
|
"learning_rate": 5.000000000000003e-06, |
|
"loss": 0.0852, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.038697498820198, |
|
"grad_norm": 0.08994662846331379, |
|
"learning_rate": 4.911955743995042e-06, |
|
"loss": 0.0845, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.048135913166588, |
|
"grad_norm": 0.08715835430152602, |
|
"learning_rate": 4.824440497483802e-06, |
|
"loss": 0.0847, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.057574327512978, |
|
"grad_norm": 0.0943214139106939, |
|
"learning_rate": 4.737463359521618e-06, |
|
"loss": 0.0845, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.067012741859368, |
|
"grad_norm": 0.09049982951899538, |
|
"learning_rate": 4.6510333732160915e-06, |
|
"loss": 0.085, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.0764511562057573, |
|
"grad_norm": 0.08823142440331375, |
|
"learning_rate": 4.565159524786888e-06, |
|
"loss": 0.0867, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.085889570552147, |
|
"grad_norm": 0.08727162151071796, |
|
"learning_rate": 4.479850742631396e-06, |
|
"loss": 0.0834, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.095327984898537, |
|
"grad_norm": 0.08243182067038303, |
|
"learning_rate": 4.395115896396457e-06, |
|
"loss": 0.0849, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.104766399244927, |
|
"grad_norm": 0.08720723034547441, |
|
"learning_rate": 4.310963796056168e-06, |
|
"loss": 0.084, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.1142048135913165, |
|
"grad_norm": 0.08588933137845103, |
|
"learning_rate": 4.227403190995901e-06, |
|
"loss": 0.0875, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.1236432279377064, |
|
"grad_norm": 0.09200761679022347, |
|
"learning_rate": 4.14444276910263e-06, |
|
"loss": 0.0853, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.1330816422840964, |
|
"grad_norm": 0.08831298949051568, |
|
"learning_rate": 4.06209115586162e-06, |
|
"loss": 0.0867, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.1425200566304863, |
|
"grad_norm": 0.0893828115241757, |
|
"learning_rate": 3.980356913459642e-06, |
|
"loss": 0.0865, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.1519584709768758, |
|
"grad_norm": 0.09119946740323005, |
|
"learning_rate": 3.899248539894756e-06, |
|
"loss": 0.0848, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.1613968853232657, |
|
"grad_norm": 0.08882008929472095, |
|
"learning_rate": 3.818774468092754e-06, |
|
"loss": 0.0843, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.1708352996696556, |
|
"grad_norm": 0.08922739717614447, |
|
"learning_rate": 3.738943065030376e-06, |
|
"loss": 0.0811, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.180273714016045, |
|
"grad_norm": 0.08622476744102522, |
|
"learning_rate": 3.659762630865411e-06, |
|
"loss": 0.083, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.189712128362435, |
|
"grad_norm": 0.08399251697806781, |
|
"learning_rate": 3.5812413980736916e-06, |
|
"loss": 0.0827, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.199150542708825, |
|
"grad_norm": 0.09006770579644241, |
|
"learning_rate": 3.5033875305931662e-06, |
|
"loss": 0.0849, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.208588957055215, |
|
"grad_norm": 0.08747795506814363, |
|
"learning_rate": 3.4262091229750973e-06, |
|
"loss": 0.0822, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.2180273714016043, |
|
"grad_norm": 0.08840099962821243, |
|
"learning_rate": 3.3497141995424397e-06, |
|
"loss": 0.0835, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.2274657857479943, |
|
"grad_norm": 0.08986511740506226, |
|
"learning_rate": 3.2739107135555603e-06, |
|
"loss": 0.0841, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.236904200094384, |
|
"grad_norm": 0.08924913381765429, |
|
"learning_rate": 3.1988065463853204e-06, |
|
"loss": 0.0849, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.246342614440774, |
|
"grad_norm": 0.08622955784811655, |
|
"learning_rate": 3.1244095066936396e-06, |
|
"loss": 0.0848, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.2557810287871636, |
|
"grad_norm": 0.08819044440789944, |
|
"learning_rate": 3.050727329621637e-06, |
|
"loss": 0.0835, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.2652194431335535, |
|
"grad_norm": 0.08900963097417651, |
|
"learning_rate": 2.977767675985377e-06, |
|
"loss": 0.0805, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.2746578574799434, |
|
"grad_norm": 0.0889820260869723, |
|
"learning_rate": 2.905538131479376e-06, |
|
"loss": 0.0844, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.2840962718263333, |
|
"grad_norm": 0.08508956855605507, |
|
"learning_rate": 2.8340462058879214e-06, |
|
"loss": 0.082, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.293534686172723, |
|
"grad_norm": 0.0859290434178059, |
|
"learning_rate": 2.76329933230425e-06, |
|
"loss": 0.0819, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.3029731005191127, |
|
"grad_norm": 0.08310233373376713, |
|
"learning_rate": 2.6933048663577297e-06, |
|
"loss": 0.0811, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.3124115148655027, |
|
"grad_norm": 0.08745596999299074, |
|
"learning_rate": 2.6240700854490988e-06, |
|
"loss": 0.0824, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.3218499292118926, |
|
"grad_norm": 0.08451577327209396, |
|
"learning_rate": 2.5556021879938074e-06, |
|
"loss": 0.0828, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.331288343558282, |
|
"grad_norm": 0.09129945645731877, |
|
"learning_rate": 2.4879082926735974e-06, |
|
"loss": 0.0837, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.340726757904672, |
|
"grad_norm": 0.083910957600724, |
|
"learning_rate": 2.4209954376963797e-06, |
|
"loss": 0.0816, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.350165172251062, |
|
"grad_norm": 0.08447673558716574, |
|
"learning_rate": 2.354870580064439e-06, |
|
"loss": 0.0808, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.3596035865974514, |
|
"grad_norm": 0.08671014168934867, |
|
"learning_rate": 2.289540594851122e-06, |
|
"loss": 0.0814, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.3690420009438413, |
|
"grad_norm": 0.08155772855747594, |
|
"learning_rate": 2.225012274486028e-06, |
|
"loss": 0.0791, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.3784804152902312, |
|
"grad_norm": 0.08555794455636312, |
|
"learning_rate": 2.1612923280487883e-06, |
|
"loss": 0.0843, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.387918829636621, |
|
"grad_norm": 0.08566290155470521, |
|
"learning_rate": 2.0983873805715216e-06, |
|
"loss": 0.0837, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.397357243983011, |
|
"grad_norm": 0.08531077534490512, |
|
"learning_rate": 2.0363039723500155e-06, |
|
"loss": 0.0838, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.4067956583294006, |
|
"grad_norm": 0.08375539552341336, |
|
"learning_rate": 1.9750485582637245e-06, |
|
"loss": 0.0822, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.4162340726757905, |
|
"grad_norm": 0.08503293134727295, |
|
"learning_rate": 1.9146275071046626e-06, |
|
"loss": 0.0849, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.4256724870221804, |
|
"grad_norm": 0.08165485153496227, |
|
"learning_rate": 1.8550471009152138e-06, |
|
"loss": 0.0803, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.43511090136857, |
|
"grad_norm": 0.08309504205099255, |
|
"learning_rate": 1.7963135343349914e-06, |
|
"loss": 0.0789, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.44454931571496, |
|
"grad_norm": 0.08294260063335909, |
|
"learning_rate": 1.73843291395678e-06, |
|
"loss": 0.0823, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.4539877300613497, |
|
"grad_norm": 0.08537750016844735, |
|
"learning_rate": 1.6814112576916142e-06, |
|
"loss": 0.0825, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.4634261444077397, |
|
"grad_norm": 0.08433197560779272, |
|
"learning_rate": 1.6252544941430982e-06, |
|
"loss": 0.0813, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.472864558754129, |
|
"grad_norm": 0.08321923421106348, |
|
"learning_rate": 1.5699684619909983e-06, |
|
"loss": 0.0826, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.482302973100519, |
|
"grad_norm": 0.08373732680479463, |
|
"learning_rate": 1.5155589093841939e-06, |
|
"loss": 0.0802, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.491741387446909, |
|
"grad_norm": 0.09112397953039225, |
|
"learning_rate": 1.4620314933430269e-06, |
|
"loss": 0.081, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.501179801793299, |
|
"grad_norm": 0.08246005822696881, |
|
"learning_rate": 1.4093917791711497e-06, |
|
"loss": 0.0808, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.5106182161396884, |
|
"grad_norm": 0.08399111524479627, |
|
"learning_rate": 1.357645239876879e-06, |
|
"loss": 0.08, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.5200566304860783, |
|
"grad_norm": 0.08520663335129833, |
|
"learning_rate": 1.3067972556041753e-06, |
|
"loss": 0.0818, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.5294950448324682, |
|
"grad_norm": 0.08299784168003543, |
|
"learning_rate": 1.2568531130732498e-06, |
|
"loss": 0.0807, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.5389334591788577, |
|
"grad_norm": 0.08594453806774255, |
|
"learning_rate": 1.207818005030904e-06, |
|
"loss": 0.0802, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.5483718735252476, |
|
"grad_norm": 0.08039958287229476, |
|
"learning_rate": 1.1596970297106458e-06, |
|
"loss": 0.0818, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.5578102878716376, |
|
"grad_norm": 0.08085895400758615, |
|
"learning_rate": 1.1124951903025981e-06, |
|
"loss": 0.0806, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.5672487022180275, |
|
"grad_norm": 0.08458215660632902, |
|
"learning_rate": 1.0662173944333288e-06, |
|
"loss": 0.081, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.5766871165644174, |
|
"grad_norm": 0.0826446041832577, |
|
"learning_rate": 1.0208684536555968e-06, |
|
"loss": 0.081, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.586125530910807, |
|
"grad_norm": 0.08442284791430928, |
|
"learning_rate": 9.764530829480822e-07, |
|
"loss": 0.0832, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.595563945257197, |
|
"grad_norm": 0.08336636617446731, |
|
"learning_rate": 9.329759002251726e-07, |
|
"loss": 0.0802, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.6050023596035867, |
|
"grad_norm": 0.0846992268529094, |
|
"learning_rate": 8.904414258568306e-07, |
|
"loss": 0.0799, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.614440773949976, |
|
"grad_norm": 0.08300701320734614, |
|
"learning_rate": 8.488540821986035e-07, |
|
"loss": 0.0827, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.623879188296366, |
|
"grad_norm": 0.08443465909555346, |
|
"learning_rate": 8.082181931318311e-07, |
|
"loss": 0.0792, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.633317602642756, |
|
"grad_norm": 0.08476186997139065, |
|
"learning_rate": 7.685379836140872e-07, |
|
"loss": 0.079, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.642756016989146, |
|
"grad_norm": 0.08472149270611445, |
|
"learning_rate": 7.298175792398976e-07, |
|
"loss": 0.0818, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.652194431335536, |
|
"grad_norm": 0.08032205169166948, |
|
"learning_rate": 6.920610058118105e-07, |
|
"loss": 0.0804, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.6616328456819254, |
|
"grad_norm": 0.08367697987102687, |
|
"learning_rate": 6.552721889218194e-07, |
|
"loss": 0.0816, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.6710712600283153, |
|
"grad_norm": 0.08668064800964889, |
|
"learning_rate": 6.194549535432137e-07, |
|
"loss": 0.08, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.680509674374705, |
|
"grad_norm": 0.0828804628433115, |
|
"learning_rate": 5.846130236329073e-07, |
|
"loss": 0.0823, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.6899480887210947, |
|
"grad_norm": 0.08378018124842795, |
|
"learning_rate": 5.507500217442341e-07, |
|
"loss": 0.0809, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.6993865030674846, |
|
"grad_norm": 0.08220414182971268, |
|
"learning_rate": 5.178694686503205e-07, |
|
"loss": 0.0784, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.7088249174138745, |
|
"grad_norm": 0.08076213129882256, |
|
"learning_rate": 4.85974782978027e-07, |
|
"loss": 0.081, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.718263331760264, |
|
"grad_norm": 0.08189865863600006, |
|
"learning_rate": 4.5506928085250033e-07, |
|
"loss": 0.0778, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.727701746106654, |
|
"grad_norm": 0.08777777733586507, |
|
"learning_rate": 4.251561755524036e-07, |
|
"loss": 0.0832, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.737140160453044, |
|
"grad_norm": 0.08438414185840341, |
|
"learning_rate": 3.9623857717581813e-07, |
|
"loss": 0.0808, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.746578574799434, |
|
"grad_norm": 0.08245920785308986, |
|
"learning_rate": 3.6831949231689203e-07, |
|
"loss": 0.081, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 2.7560169891458237, |
|
"grad_norm": 0.08356304284398074, |
|
"learning_rate": 3.414018237532335e-07, |
|
"loss": 0.0821, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 2.765455403492213, |
|
"grad_norm": 0.08633702580648989, |
|
"learning_rate": 3.154883701441136e-07, |
|
"loss": 0.08, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 2.774893817838603, |
|
"grad_norm": 0.0821243443006684, |
|
"learning_rate": 2.905818257394799e-07, |
|
"loss": 0.0798, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.784332232184993, |
|
"grad_norm": 0.08233971677127812, |
|
"learning_rate": 2.666847800998362e-07, |
|
"loss": 0.0819, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.7937706465313825, |
|
"grad_norm": 0.08176593022884952, |
|
"learning_rate": 2.437997178270035e-07, |
|
"loss": 0.0807, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 2.8032090608777724, |
|
"grad_norm": 0.08485236840414098, |
|
"learning_rate": 2.219290183057865e-07, |
|
"loss": 0.0806, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 2.8126474752241624, |
|
"grad_norm": 0.08134044819035506, |
|
"learning_rate": 2.0107495545659829e-07, |
|
"loss": 0.0778, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 2.8220858895705523, |
|
"grad_norm": 0.08200978384685277, |
|
"learning_rate": 1.8123969749902714e-07, |
|
"loss": 0.0777, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 2.831524303916942, |
|
"grad_norm": 0.08409107175117113, |
|
"learning_rate": 1.6242530672641143e-07, |
|
"loss": 0.0813, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.8409627182633317, |
|
"grad_norm": 0.08288246905003016, |
|
"learning_rate": 1.4463373929141766e-07, |
|
"loss": 0.0788, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 2.8504011326097216, |
|
"grad_norm": 0.08212980978941326, |
|
"learning_rate": 1.2786684500265546e-07, |
|
"loss": 0.0819, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 2.8598395469561115, |
|
"grad_norm": 0.0832626354470357, |
|
"learning_rate": 1.1212636713235581e-07, |
|
"loss": 0.0794, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 2.869277961302501, |
|
"grad_norm": 0.0836378792472762, |
|
"learning_rate": 9.741394223512057e-08, |
|
"loss": 0.0814, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.878716375648891, |
|
"grad_norm": 0.08017963643030881, |
|
"learning_rate": 8.373109997776185e-08, |
|
"loss": 0.0804, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.888154789995281, |
|
"grad_norm": 0.08105963755024889, |
|
"learning_rate": 7.10792629802659e-08, |
|
"loss": 0.08, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 2.8975932043416703, |
|
"grad_norm": 0.08196798527897163, |
|
"learning_rate": 5.945974666788479e-08, |
|
"loss": 0.0809, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 2.9070316186880603, |
|
"grad_norm": 0.08445010118063752, |
|
"learning_rate": 4.887375913436132e-08, |
|
"loss": 0.0817, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 2.91647003303445, |
|
"grad_norm": 0.0830340739185833, |
|
"learning_rate": 3.932240101633178e-08, |
|
"loss": 0.0823, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 2.92590844738084, |
|
"grad_norm": 0.08107816362561618, |
|
"learning_rate": 3.0806665378884106e-08, |
|
"loss": 0.0764, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.93534686172723, |
|
"grad_norm": 0.08498347611137054, |
|
"learning_rate": 2.33274376123116e-08, |
|
"loss": 0.0784, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.9447852760736195, |
|
"grad_norm": 0.08214101571060746, |
|
"learning_rate": 1.68854953400599e-08, |
|
"loss": 0.0795, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.9542236904200094, |
|
"grad_norm": 0.0849230883706386, |
|
"learning_rate": 1.1481508337869429e-08, |
|
"loss": 0.0801, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.9636621047663994, |
|
"grad_norm": 0.09022687172778407, |
|
"learning_rate": 7.1160384641455475e-09, |
|
"loss": 0.0828, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.973100519112789, |
|
"grad_norm": 0.08399540827662202, |
|
"learning_rate": 3.7895396015374955e-09, |
|
"loss": 0.0808, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.9825389334591788, |
|
"grad_norm": 0.08027494442622467, |
|
"learning_rate": 1.502357609749483e-09, |
|
"loss": 0.0803, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.9919773478055687, |
|
"grad_norm": 0.08480380742956374, |
|
"learning_rate": 2.5473028957945234e-10, |
|
"loss": 0.0813, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.9985842378480414, |
|
"step": 3177, |
|
"total_flos": 5.875342281120154e+16, |
|
"train_loss": 0.11385704865424108, |
|
"train_runtime": 162519.6807, |
|
"train_samples_per_second": 3.441, |
|
"train_steps_per_second": 0.02 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3177, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.875342281120154e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|