|
{ |
|
"best_metric": 0.9590113159486987, |
|
"best_model_checkpoint": "output_classification_1280/hazard/checkpoint-10538", |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 10538, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.007591573353577529, |
|
"grad_norm": 27.469635009765625, |
|
"learning_rate": 5.0607287449392715e-08, |
|
"loss": 1.6903, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.015183146707155058, |
|
"grad_norm": 29.22759437561035, |
|
"learning_rate": 9.109311740890688e-08, |
|
"loss": 1.6631, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.022774720060732587, |
|
"grad_norm": 22.48965835571289, |
|
"learning_rate": 1.417004048582996e-07, |
|
"loss": 1.7504, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.030366293414310117, |
|
"grad_norm": 30.96166229248047, |
|
"learning_rate": 1.9230769230769234e-07, |
|
"loss": 1.7496, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03795786676788764, |
|
"grad_norm": 28.63855743408203, |
|
"learning_rate": 2.4291497975708504e-07, |
|
"loss": 1.6787, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.045549440121465175, |
|
"grad_norm": 31.33084487915039, |
|
"learning_rate": 2.9352226720647774e-07, |
|
"loss": 1.7747, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0531410134750427, |
|
"grad_norm": 27.18292236328125, |
|
"learning_rate": 3.390688259109312e-07, |
|
"loss": 1.6577, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06073258682862023, |
|
"grad_norm": 30.794124603271484, |
|
"learning_rate": 3.896761133603239e-07, |
|
"loss": 1.7097, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06832416018219777, |
|
"grad_norm": 42.49530792236328, |
|
"learning_rate": 4.402834008097166e-07, |
|
"loss": 1.5986, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07591573353577528, |
|
"grad_norm": 26.470556259155273, |
|
"learning_rate": 4.908906882591093e-07, |
|
"loss": 1.7342, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08350730688935282, |
|
"grad_norm": 40.713924407958984, |
|
"learning_rate": 5.414979757085021e-07, |
|
"loss": 1.517, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09109888024293035, |
|
"grad_norm": 53.97127914428711, |
|
"learning_rate": 5.921052631578947e-07, |
|
"loss": 1.3995, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09869045359650788, |
|
"grad_norm": 45.6757698059082, |
|
"learning_rate": 6.427125506072875e-07, |
|
"loss": 1.2737, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1062820269500854, |
|
"grad_norm": 35.03736114501953, |
|
"learning_rate": 6.933198380566802e-07, |
|
"loss": 1.3719, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11387360030366293, |
|
"grad_norm": 26.410057067871094, |
|
"learning_rate": 7.388663967611337e-07, |
|
"loss": 1.1505, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12146517365724047, |
|
"grad_norm": 30.611797332763672, |
|
"learning_rate": 7.844129554655872e-07, |
|
"loss": 1.3579, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12905674701081798, |
|
"grad_norm": 36.64908981323242, |
|
"learning_rate": 8.350202429149798e-07, |
|
"loss": 1.2164, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.13664832036439553, |
|
"grad_norm": 47.913612365722656, |
|
"learning_rate": 8.805668016194332e-07, |
|
"loss": 1.1154, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14423989371797305, |
|
"grad_norm": 71.07138061523438, |
|
"learning_rate": 9.31174089068826e-07, |
|
"loss": 1.2263, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.15183146707155057, |
|
"grad_norm": 46.60552978515625, |
|
"learning_rate": 9.817813765182186e-07, |
|
"loss": 1.3512, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15942304042512812, |
|
"grad_norm": 39.1867561340332, |
|
"learning_rate": 1.0323886639676114e-06, |
|
"loss": 1.095, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.16701461377870563, |
|
"grad_norm": 48.71131896972656, |
|
"learning_rate": 1.0829959514170041e-06, |
|
"loss": 1.401, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.17460618713228315, |
|
"grad_norm": 46.5413703918457, |
|
"learning_rate": 1.133603238866397e-06, |
|
"loss": 1.4863, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1821977604858607, |
|
"grad_norm": 45.00301742553711, |
|
"learning_rate": 1.1842105263157894e-06, |
|
"loss": 1.0144, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.18978933383943822, |
|
"grad_norm": 66.17977905273438, |
|
"learning_rate": 1.2348178137651822e-06, |
|
"loss": 1.2741, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.19738090719301576, |
|
"grad_norm": 71.00930786132812, |
|
"learning_rate": 1.285425101214575e-06, |
|
"loss": 1.3536, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.20497248054659328, |
|
"grad_norm": 66.67515563964844, |
|
"learning_rate": 1.336032388663968e-06, |
|
"loss": 1.247, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2125640539001708, |
|
"grad_norm": 47.43987274169922, |
|
"learning_rate": 1.3866396761133605e-06, |
|
"loss": 1.2843, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.22015562725374835, |
|
"grad_norm": 41.783695220947266, |
|
"learning_rate": 1.4372469635627532e-06, |
|
"loss": 1.401, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.22774720060732587, |
|
"grad_norm": 61.59716796875, |
|
"learning_rate": 1.4878542510121458e-06, |
|
"loss": 1.134, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.23533877396090339, |
|
"grad_norm": 52.884761810302734, |
|
"learning_rate": 1.5384615384615387e-06, |
|
"loss": 1.2276, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.24293034731448093, |
|
"grad_norm": 43.76587677001953, |
|
"learning_rate": 1.5890688259109313e-06, |
|
"loss": 1.1747, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.25052192066805845, |
|
"grad_norm": 24.984729766845703, |
|
"learning_rate": 1.639676113360324e-06, |
|
"loss": 1.2557, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.25811349402163597, |
|
"grad_norm": 32.03645324707031, |
|
"learning_rate": 1.6902834008097168e-06, |
|
"loss": 1.0106, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2657050673752135, |
|
"grad_norm": 37.198177337646484, |
|
"learning_rate": 1.7408906882591095e-06, |
|
"loss": 1.0211, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.27329664072879106, |
|
"grad_norm": 46.07294464111328, |
|
"learning_rate": 1.791497975708502e-06, |
|
"loss": 1.019, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2808882140823686, |
|
"grad_norm": 61.57015609741211, |
|
"learning_rate": 1.8370445344129556e-06, |
|
"loss": 1.3087, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2884797874359461, |
|
"grad_norm": 37.366268157958984, |
|
"learning_rate": 1.8876518218623483e-06, |
|
"loss": 0.9793, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2960713607895236, |
|
"grad_norm": 25.108686447143555, |
|
"learning_rate": 1.938259109311741e-06, |
|
"loss": 1.2809, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.30366293414310114, |
|
"grad_norm": 41.831172943115234, |
|
"learning_rate": 1.988866396761134e-06, |
|
"loss": 1.04, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3112545074966787, |
|
"grad_norm": 53.10079574584961, |
|
"learning_rate": 2.0394736842105266e-06, |
|
"loss": 1.1906, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.31884608085025623, |
|
"grad_norm": 38.19053649902344, |
|
"learning_rate": 2.090080971659919e-06, |
|
"loss": 1.1724, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.32643765420383375, |
|
"grad_norm": 43.842498779296875, |
|
"learning_rate": 2.140688259109312e-06, |
|
"loss": 1.1657, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.33402922755741127, |
|
"grad_norm": 54.60807418823242, |
|
"learning_rate": 2.1912955465587044e-06, |
|
"loss": 0.9103, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3416208009109888, |
|
"grad_norm": 48.880218505859375, |
|
"learning_rate": 2.241902834008097e-06, |
|
"loss": 1.1537, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3492123742645663, |
|
"grad_norm": 40.26908493041992, |
|
"learning_rate": 2.2925101214574904e-06, |
|
"loss": 1.0753, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3568039476181439, |
|
"grad_norm": 65.76298522949219, |
|
"learning_rate": 2.3431174089068827e-06, |
|
"loss": 0.9305, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3643955209717214, |
|
"grad_norm": 33.159881591796875, |
|
"learning_rate": 2.3937246963562755e-06, |
|
"loss": 0.9382, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3719870943252989, |
|
"grad_norm": 32.02263259887695, |
|
"learning_rate": 2.4443319838056682e-06, |
|
"loss": 0.8098, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.37957866767887644, |
|
"grad_norm": 50.231842041015625, |
|
"learning_rate": 2.494939271255061e-06, |
|
"loss": 1.0712, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.38717024103245395, |
|
"grad_norm": 54.17763137817383, |
|
"learning_rate": 2.5455465587044537e-06, |
|
"loss": 0.9814, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.39476181438603153, |
|
"grad_norm": 25.580745697021484, |
|
"learning_rate": 2.5961538461538465e-06, |
|
"loss": 0.5809, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.40235338773960905, |
|
"grad_norm": 5.449360370635986, |
|
"learning_rate": 2.646761133603239e-06, |
|
"loss": 0.5567, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.40994496109318657, |
|
"grad_norm": 29.534494400024414, |
|
"learning_rate": 2.697368421052632e-06, |
|
"loss": 0.5823, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4175365344467641, |
|
"grad_norm": 13.788243293762207, |
|
"learning_rate": 2.7479757085020247e-06, |
|
"loss": 0.9266, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4251281078003416, |
|
"grad_norm": 32.08829879760742, |
|
"learning_rate": 2.798582995951417e-06, |
|
"loss": 0.432, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4327196811539191, |
|
"grad_norm": 12.410125732421875, |
|
"learning_rate": 2.8491902834008103e-06, |
|
"loss": 0.7482, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.4403112545074967, |
|
"grad_norm": 24.522109985351562, |
|
"learning_rate": 2.8997975708502026e-06, |
|
"loss": 0.504, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4479028278610742, |
|
"grad_norm": 17.949840545654297, |
|
"learning_rate": 2.9504048582995953e-06, |
|
"loss": 0.6836, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.45549440121465173, |
|
"grad_norm": 13.618581771850586, |
|
"learning_rate": 3.001012145748988e-06, |
|
"loss": 0.6873, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.46308597456822925, |
|
"grad_norm": 50.62519454956055, |
|
"learning_rate": 3.0516194331983804e-06, |
|
"loss": 0.4781, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.47067754792180677, |
|
"grad_norm": 27.868289947509766, |
|
"learning_rate": 3.1022267206477736e-06, |
|
"loss": 0.7148, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.47826912127538435, |
|
"grad_norm": 30.8429012298584, |
|
"learning_rate": 3.1528340080971664e-06, |
|
"loss": 0.591, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.48586069462896186, |
|
"grad_norm": 51.042518615722656, |
|
"learning_rate": 3.2034412955465587e-06, |
|
"loss": 0.5481, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.4934522679825394, |
|
"grad_norm": 42.53914260864258, |
|
"learning_rate": 3.254048582995952e-06, |
|
"loss": 0.6404, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5010438413361169, |
|
"grad_norm": 28.016672134399414, |
|
"learning_rate": 3.3046558704453446e-06, |
|
"loss": 1.075, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5086354146896944, |
|
"grad_norm": 26.764345169067383, |
|
"learning_rate": 3.355263157894737e-06, |
|
"loss": 0.5689, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5162269880432719, |
|
"grad_norm": 10.721156120300293, |
|
"learning_rate": 3.40587044534413e-06, |
|
"loss": 0.302, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5238185613968495, |
|
"grad_norm": 33.98798751831055, |
|
"learning_rate": 3.4564777327935225e-06, |
|
"loss": 0.3699, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.531410134750427, |
|
"grad_norm": 98.7930908203125, |
|
"learning_rate": 3.5070850202429152e-06, |
|
"loss": 0.5585, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5390017081040046, |
|
"grad_norm": 17.008193969726562, |
|
"learning_rate": 3.557692307692308e-06, |
|
"loss": 0.5513, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5465932814575821, |
|
"grad_norm": 0.9657185077667236, |
|
"learning_rate": 3.6082995951417003e-06, |
|
"loss": 0.3778, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5541848548111596, |
|
"grad_norm": 22.920196533203125, |
|
"learning_rate": 3.6589068825910935e-06, |
|
"loss": 0.2108, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5617764281647372, |
|
"grad_norm": 24.24422264099121, |
|
"learning_rate": 3.7095141700404862e-06, |
|
"loss": 0.774, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5693680015183147, |
|
"grad_norm": 10.006725311279297, |
|
"learning_rate": 3.7601214574898786e-06, |
|
"loss": 0.3806, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5769595748718922, |
|
"grad_norm": 25.408447265625, |
|
"learning_rate": 3.8107287449392717e-06, |
|
"loss": 0.3539, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5845511482254697, |
|
"grad_norm": 1.4603581428527832, |
|
"learning_rate": 3.8613360323886645e-06, |
|
"loss": 0.2608, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5921427215790472, |
|
"grad_norm": 16.798980712890625, |
|
"learning_rate": 3.911943319838057e-06, |
|
"loss": 0.3287, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5997342949326248, |
|
"grad_norm": 11.706854820251465, |
|
"learning_rate": 3.96255060728745e-06, |
|
"loss": 0.5302, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6073258682862023, |
|
"grad_norm": 20.42545509338379, |
|
"learning_rate": 4.013157894736842e-06, |
|
"loss": 0.489, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6149174416397798, |
|
"grad_norm": 44.284629821777344, |
|
"learning_rate": 4.063765182186235e-06, |
|
"loss": 0.4183, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6225090149933574, |
|
"grad_norm": 35.91806411743164, |
|
"learning_rate": 4.114372469635628e-06, |
|
"loss": 0.6323, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6301005883469349, |
|
"grad_norm": 5.10564661026001, |
|
"learning_rate": 4.16497975708502e-06, |
|
"loss": 0.2137, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6376921617005125, |
|
"grad_norm": 14.327881813049316, |
|
"learning_rate": 4.215587044534413e-06, |
|
"loss": 0.1283, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.64528373505409, |
|
"grad_norm": 0.4119018018245697, |
|
"learning_rate": 4.2661943319838065e-06, |
|
"loss": 0.5361, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6528753084076675, |
|
"grad_norm": 0.33248305320739746, |
|
"learning_rate": 4.316801619433199e-06, |
|
"loss": 0.3669, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.660466881761245, |
|
"grad_norm": 0.44110462069511414, |
|
"learning_rate": 4.367408906882591e-06, |
|
"loss": 0.2979, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6680584551148225, |
|
"grad_norm": 0.34030860662460327, |
|
"learning_rate": 4.418016194331984e-06, |
|
"loss": 0.2611, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6756500284684, |
|
"grad_norm": 46.19267272949219, |
|
"learning_rate": 4.468623481781377e-06, |
|
"loss": 0.2948, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6832416018219776, |
|
"grad_norm": 33.486717224121094, |
|
"learning_rate": 4.51923076923077e-06, |
|
"loss": 0.2593, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6908331751755551, |
|
"grad_norm": 43.04954528808594, |
|
"learning_rate": 4.569838056680162e-06, |
|
"loss": 0.2791, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6984247485291326, |
|
"grad_norm": 57.075809478759766, |
|
"learning_rate": 4.6204453441295545e-06, |
|
"loss": 0.2198, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7060163218827102, |
|
"grad_norm": 52.269168853759766, |
|
"learning_rate": 4.671052631578948e-06, |
|
"loss": 0.4377, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.7136078952362878, |
|
"grad_norm": 0.06886545568704605, |
|
"learning_rate": 4.72165991902834e-06, |
|
"loss": 0.1961, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7211994685898653, |
|
"grad_norm": 41.10899353027344, |
|
"learning_rate": 4.772267206477733e-06, |
|
"loss": 0.4847, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7287910419434428, |
|
"grad_norm": 2.2750415802001953, |
|
"learning_rate": 4.822874493927126e-06, |
|
"loss": 0.2632, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7363826152970203, |
|
"grad_norm": 0.374896764755249, |
|
"learning_rate": 4.873481781376519e-06, |
|
"loss": 0.2243, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7439741886505978, |
|
"grad_norm": 29.88395118713379, |
|
"learning_rate": 4.924089068825911e-06, |
|
"loss": 0.256, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7515657620041754, |
|
"grad_norm": 48.7998161315918, |
|
"learning_rate": 4.974696356275304e-06, |
|
"loss": 0.2969, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7591573353577529, |
|
"grad_norm": 33.62395095825195, |
|
"learning_rate": 5.025303643724697e-06, |
|
"loss": 0.4137, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7667489087113304, |
|
"grad_norm": 37.788795471191406, |
|
"learning_rate": 5.07591093117409e-06, |
|
"loss": 0.2332, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7743404820649079, |
|
"grad_norm": 0.14387387037277222, |
|
"learning_rate": 5.126518218623482e-06, |
|
"loss": 0.2919, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7819320554184854, |
|
"grad_norm": 5.84027099609375, |
|
"learning_rate": 5.177125506072875e-06, |
|
"loss": 0.2009, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7895236287720631, |
|
"grad_norm": 0.04207382723689079, |
|
"learning_rate": 5.227732793522268e-06, |
|
"loss": 0.2878, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7971152021256406, |
|
"grad_norm": 45.870460510253906, |
|
"learning_rate": 5.27834008097166e-06, |
|
"loss": 0.2449, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.8047067754792181, |
|
"grad_norm": 51.27897262573242, |
|
"learning_rate": 5.328947368421054e-06, |
|
"loss": 0.2529, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.8122983488327956, |
|
"grad_norm": 0.5692594051361084, |
|
"learning_rate": 5.379554655870446e-06, |
|
"loss": 0.6134, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.8198899221863731, |
|
"grad_norm": 0.09678292274475098, |
|
"learning_rate": 5.430161943319839e-06, |
|
"loss": 0.2444, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8274814955399507, |
|
"grad_norm": 1.1001735925674438, |
|
"learning_rate": 5.480769230769232e-06, |
|
"loss": 0.2606, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.8350730688935282, |
|
"grad_norm": 0.183668851852417, |
|
"learning_rate": 5.531376518218624e-06, |
|
"loss": 0.4214, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8426646422471057, |
|
"grad_norm": 1.1151483058929443, |
|
"learning_rate": 5.5819838056680164e-06, |
|
"loss": 0.1742, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8502562156006832, |
|
"grad_norm": 4.986824989318848, |
|
"learning_rate": 5.632591093117409e-06, |
|
"loss": 0.2275, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8578477889542607, |
|
"grad_norm": 40.5273323059082, |
|
"learning_rate": 5.683198380566802e-06, |
|
"loss": 0.2895, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8654393623078382, |
|
"grad_norm": 27.60036849975586, |
|
"learning_rate": 5.733805668016194e-06, |
|
"loss": 0.1973, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.8730309356614159, |
|
"grad_norm": 0.3474140763282776, |
|
"learning_rate": 5.784412955465587e-06, |
|
"loss": 0.2508, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8806225090149934, |
|
"grad_norm": 41.10483169555664, |
|
"learning_rate": 5.835020242914981e-06, |
|
"loss": 0.1403, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8882140823685709, |
|
"grad_norm": 38.43809509277344, |
|
"learning_rate": 5.885627530364373e-06, |
|
"loss": 0.1639, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8958056557221484, |
|
"grad_norm": 0.1039985790848732, |
|
"learning_rate": 5.936234817813765e-06, |
|
"loss": 0.3821, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.903397229075726, |
|
"grad_norm": 7.511643409729004, |
|
"learning_rate": 5.9868421052631585e-06, |
|
"loss": 0.3217, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.9109888024293035, |
|
"grad_norm": 1.0779646635055542, |
|
"learning_rate": 6.037449392712551e-06, |
|
"loss": 0.306, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.918580375782881, |
|
"grad_norm": 36.478607177734375, |
|
"learning_rate": 6.088056680161943e-06, |
|
"loss": 0.2311, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.9261719491364585, |
|
"grad_norm": 5.484299659729004, |
|
"learning_rate": 6.138663967611337e-06, |
|
"loss": 0.1957, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.933763522490036, |
|
"grad_norm": 36.05448532104492, |
|
"learning_rate": 6.1892712550607295e-06, |
|
"loss": 0.1806, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.9413550958436135, |
|
"grad_norm": 0.11734521389007568, |
|
"learning_rate": 6.239878542510122e-06, |
|
"loss": 0.1755, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9489466691971912, |
|
"grad_norm": 29.589279174804688, |
|
"learning_rate": 6.290485829959515e-06, |
|
"loss": 0.1282, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9565382425507687, |
|
"grad_norm": 0.0918528214097023, |
|
"learning_rate": 6.341093117408907e-06, |
|
"loss": 0.1178, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9641298159043462, |
|
"grad_norm": 23.708993911743164, |
|
"learning_rate": 6.3917004048583e-06, |
|
"loss": 0.4097, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9717213892579237, |
|
"grad_norm": 34.241607666015625, |
|
"learning_rate": 6.442307692307693e-06, |
|
"loss": 0.25, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9793129626115012, |
|
"grad_norm": 3.067420482635498, |
|
"learning_rate": 6.492914979757086e-06, |
|
"loss": 0.201, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9869045359650788, |
|
"grad_norm": 20.88066291809082, |
|
"learning_rate": 6.543522267206478e-06, |
|
"loss": 0.2371, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9944961093186563, |
|
"grad_norm": 463.48541259765625, |
|
"learning_rate": 6.5941295546558715e-06, |
|
"loss": 0.436, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9998102106661606, |
|
"eval_f1": 0.8680898011188399, |
|
"eval_loss": 0.3059525787830353, |
|
"eval_precision": 0.8342809981821465, |
|
"eval_recall": 0.8694992412746586, |
|
"eval_runtime": 75.6444, |
|
"eval_samples_per_second": 17.424, |
|
"eval_steps_per_second": 17.424, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 1.0020876826722338, |
|
"grad_norm": 49.61616897583008, |
|
"learning_rate": 6.644736842105264e-06, |
|
"loss": 0.0671, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.0096792560258114, |
|
"grad_norm": 0.10375616699457169, |
|
"learning_rate": 6.695344129554656e-06, |
|
"loss": 0.3414, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.0172708293793888, |
|
"grad_norm": 1.4322081804275513, |
|
"learning_rate": 6.745951417004049e-06, |
|
"loss": 0.4417, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.0248624027329665, |
|
"grad_norm": 33.353389739990234, |
|
"learning_rate": 6.796558704453442e-06, |
|
"loss": 0.2982, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.0324539760865439, |
|
"grad_norm": 0.03437357768416405, |
|
"learning_rate": 6.847165991902834e-06, |
|
"loss": 0.1854, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.0400455494401215, |
|
"grad_norm": 25.882795333862305, |
|
"learning_rate": 6.897773279352226e-06, |
|
"loss": 0.2332, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.047637122793699, |
|
"grad_norm": 0.06532780081033707, |
|
"learning_rate": 6.94838056680162e-06, |
|
"loss": 0.0215, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.0552286961472765, |
|
"grad_norm": 32.39247131347656, |
|
"learning_rate": 6.998987854251013e-06, |
|
"loss": 0.072, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.062820269500854, |
|
"grad_norm": 0.027906494215130806, |
|
"learning_rate": 7.049595141700405e-06, |
|
"loss": 0.2047, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.0704118428544316, |
|
"grad_norm": 0.6285625100135803, |
|
"learning_rate": 7.100202429149798e-06, |
|
"loss": 0.1842, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.0780034162080092, |
|
"grad_norm": 45.90504837036133, |
|
"learning_rate": 7.1508097165991906e-06, |
|
"loss": 0.3873, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.0855949895615866, |
|
"grad_norm": 0.1192048192024231, |
|
"learning_rate": 7.201417004048583e-06, |
|
"loss": 0.0922, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.0931865629151643, |
|
"grad_norm": 0.01541421003639698, |
|
"learning_rate": 7.252024291497977e-06, |
|
"loss": 0.2405, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.1007781362687417, |
|
"grad_norm": 1.1560391187667847, |
|
"learning_rate": 7.302631578947369e-06, |
|
"loss": 0.127, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.1083697096223193, |
|
"grad_norm": 0.03278697654604912, |
|
"learning_rate": 7.353238866396762e-06, |
|
"loss": 0.1782, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.1159612829758967, |
|
"grad_norm": 0.019922947511076927, |
|
"learning_rate": 7.403846153846155e-06, |
|
"loss": 0.0464, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.1235528563294743, |
|
"grad_norm": 0.06464574486017227, |
|
"learning_rate": 7.454453441295547e-06, |
|
"loss": 0.1965, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.1311444296830517, |
|
"grad_norm": 3.4782345294952393, |
|
"learning_rate": 7.505060728744939e-06, |
|
"loss": 0.2716, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.1387360030366294, |
|
"grad_norm": 11.709443092346191, |
|
"learning_rate": 7.555668016194333e-06, |
|
"loss": 0.0861, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.146327576390207, |
|
"grad_norm": 0.06341992318630219, |
|
"learning_rate": 7.606275303643725e-06, |
|
"loss": 0.2737, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.1539191497437844, |
|
"grad_norm": 0.41259318590164185, |
|
"learning_rate": 7.656882591093118e-06, |
|
"loss": 0.0553, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.1615107230973618, |
|
"grad_norm": 0.2754802107810974, |
|
"learning_rate": 7.70748987854251e-06, |
|
"loss": 0.1928, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.1691022964509394, |
|
"grad_norm": 35.12890625, |
|
"learning_rate": 7.758097165991903e-06, |
|
"loss": 0.2975, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.176693869804517, |
|
"grad_norm": 0.01568063162267208, |
|
"learning_rate": 7.808704453441295e-06, |
|
"loss": 0.0757, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.1842854431580945, |
|
"grad_norm": 63.980228424072266, |
|
"learning_rate": 7.859311740890689e-06, |
|
"loss": 0.2807, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.191877016511672, |
|
"grad_norm": 0.28402331471443176, |
|
"learning_rate": 7.909919028340081e-06, |
|
"loss": 0.0808, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.1994685898652495, |
|
"grad_norm": 0.028258422389626503, |
|
"learning_rate": 7.960526315789474e-06, |
|
"loss": 0.1397, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.2070601632188271, |
|
"grad_norm": 3.0772080421447754, |
|
"learning_rate": 8.011133603238868e-06, |
|
"loss": 0.0761, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.2146517365724046, |
|
"grad_norm": 0.37224826216697693, |
|
"learning_rate": 8.06174089068826e-06, |
|
"loss": 0.2265, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.2222433099259822, |
|
"grad_norm": 0.02686912938952446, |
|
"learning_rate": 8.112348178137652e-06, |
|
"loss": 0.0467, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.2298348832795596, |
|
"grad_norm": 0.040963444858789444, |
|
"learning_rate": 8.162955465587045e-06, |
|
"loss": 0.3815, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.2374264566331372, |
|
"grad_norm": 15.119370460510254, |
|
"learning_rate": 8.213562753036439e-06, |
|
"loss": 0.1005, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.2450180299867148, |
|
"grad_norm": 35.875064849853516, |
|
"learning_rate": 8.264170040485831e-06, |
|
"loss": 0.3051, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.2526096033402923, |
|
"grad_norm": 70.46387481689453, |
|
"learning_rate": 8.314777327935223e-06, |
|
"loss": 0.175, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.2602011766938699, |
|
"grad_norm": 0.02256329357624054, |
|
"learning_rate": 8.365384615384616e-06, |
|
"loss": 0.2415, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.2677927500474473, |
|
"grad_norm": 35.11568069458008, |
|
"learning_rate": 8.415991902834008e-06, |
|
"loss": 0.2629, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.275384323401025, |
|
"grad_norm": 71.48613739013672, |
|
"learning_rate": 8.4665991902834e-06, |
|
"loss": 0.4151, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.2829758967546023, |
|
"grad_norm": 78.90449523925781, |
|
"learning_rate": 8.517206477732795e-06, |
|
"loss": 0.1292, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.29056747010818, |
|
"grad_norm": 31.373775482177734, |
|
"learning_rate": 8.567813765182187e-06, |
|
"loss": 0.26, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.2981590434617574, |
|
"grad_norm": 0.049251481890678406, |
|
"learning_rate": 8.61842105263158e-06, |
|
"loss": 0.4099, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.305750616815335, |
|
"grad_norm": 26.275672912597656, |
|
"learning_rate": 8.669028340080973e-06, |
|
"loss": 0.1674, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.3133421901689126, |
|
"grad_norm": 56.808570861816406, |
|
"learning_rate": 8.719635627530366e-06, |
|
"loss": 0.2071, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.32093376352249, |
|
"grad_norm": 12.969684600830078, |
|
"learning_rate": 8.770242914979758e-06, |
|
"loss": 0.3515, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.3285253368760674, |
|
"grad_norm": 0.2686771750450134, |
|
"learning_rate": 8.82085020242915e-06, |
|
"loss": 0.128, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.336116910229645, |
|
"grad_norm": 0.012039333581924438, |
|
"learning_rate": 8.871457489878543e-06, |
|
"loss": 0.1058, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.3437084835832227, |
|
"grad_norm": 20.223878860473633, |
|
"learning_rate": 8.922064777327935e-06, |
|
"loss": 0.196, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.3513000569368, |
|
"grad_norm": 0.014049122110009193, |
|
"learning_rate": 8.972672064777329e-06, |
|
"loss": 0.3733, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.3588916302903777, |
|
"grad_norm": 42.03798294067383, |
|
"learning_rate": 9.023279352226721e-06, |
|
"loss": 0.1683, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.3664832036439551, |
|
"grad_norm": 0.044906727969646454, |
|
"learning_rate": 9.073886639676114e-06, |
|
"loss": 0.2116, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.3740747769975328, |
|
"grad_norm": 33.70309829711914, |
|
"learning_rate": 9.124493927125508e-06, |
|
"loss": 0.3049, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.3816663503511104, |
|
"grad_norm": 8.82701301574707, |
|
"learning_rate": 9.1751012145749e-06, |
|
"loss": 0.0822, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.3892579237046878, |
|
"grad_norm": 2.3878729343414307, |
|
"learning_rate": 9.225708502024292e-06, |
|
"loss": 0.0592, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.3968494970582652, |
|
"grad_norm": 0.0016124140238389373, |
|
"learning_rate": 9.276315789473686e-06, |
|
"loss": 0.1208, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.4044410704118429, |
|
"grad_norm": 0.13426095247268677, |
|
"learning_rate": 9.326923076923079e-06, |
|
"loss": 0.2488, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.4120326437654205, |
|
"grad_norm": 95.8023681640625, |
|
"learning_rate": 9.377530364372471e-06, |
|
"loss": 0.3505, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.4196242171189979, |
|
"grad_norm": 0.10023036599159241, |
|
"learning_rate": 9.428137651821863e-06, |
|
"loss": 0.2593, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.4272157904725755, |
|
"grad_norm": 0.0036512434016913176, |
|
"learning_rate": 9.478744939271256e-06, |
|
"loss": 0.1653, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.434807363826153, |
|
"grad_norm": 0.11651404201984406, |
|
"learning_rate": 9.529352226720648e-06, |
|
"loss": 0.1281, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.4423989371797306, |
|
"grad_norm": 99.45907592773438, |
|
"learning_rate": 9.57995951417004e-06, |
|
"loss": 0.2001, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.449990510533308, |
|
"grad_norm": 0.42387983202934265, |
|
"learning_rate": 9.630566801619434e-06, |
|
"loss": 0.2895, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.4575820838868856, |
|
"grad_norm": 155.79856872558594, |
|
"learning_rate": 9.681174089068827e-06, |
|
"loss": 0.2749, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.465173657240463, |
|
"grad_norm": 0.036998867988586426, |
|
"learning_rate": 9.731781376518219e-06, |
|
"loss": 0.3386, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.4727652305940406, |
|
"grad_norm": 20.147798538208008, |
|
"learning_rate": 9.782388663967613e-06, |
|
"loss": 0.0259, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.4803568039476183, |
|
"grad_norm": 0.6697649955749512, |
|
"learning_rate": 9.832995951417005e-06, |
|
"loss": 0.0671, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.4879483773011957, |
|
"grad_norm": 34.21855545043945, |
|
"learning_rate": 9.883603238866398e-06, |
|
"loss": 0.4116, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.495539950654773, |
|
"grad_norm": 55.607818603515625, |
|
"learning_rate": 9.93421052631579e-06, |
|
"loss": 0.2809, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.5031315240083507, |
|
"grad_norm": 7.255304336547852, |
|
"learning_rate": 9.984817813765182e-06, |
|
"loss": 0.2086, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.5107230973619283, |
|
"grad_norm": 0.03336051478981972, |
|
"learning_rate": 9.999996175090899e-06, |
|
"loss": 0.0513, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.5183146707155057, |
|
"grad_norm": 0.016688983887434006, |
|
"learning_rate": 9.999977440856317e-06, |
|
"loss": 0.1644, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.5259062440690834, |
|
"grad_norm": 25.093719482421875, |
|
"learning_rate": 9.999943094820354e-06, |
|
"loss": 0.2127, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.5334978174226608, |
|
"grad_norm": 28.240819931030273, |
|
"learning_rate": 9.999893137090254e-06, |
|
"loss": 0.2039, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.5410893907762384, |
|
"grad_norm": 0.2675958275794983, |
|
"learning_rate": 9.999827567822e-06, |
|
"loss": 0.1192, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.548680964129816, |
|
"grad_norm": 0.0035021628718823195, |
|
"learning_rate": 9.999746387220327e-06, |
|
"loss": 0.4307, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.5562725374833934, |
|
"grad_norm": 45.449134826660156, |
|
"learning_rate": 9.999649595538705e-06, |
|
"loss": 0.1564, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.5638641108369709, |
|
"grad_norm": 28.17760467529297, |
|
"learning_rate": 9.999537193079362e-06, |
|
"loss": 0.3947, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.5714556841905485, |
|
"grad_norm": 0.08233608305454254, |
|
"learning_rate": 9.999409180193255e-06, |
|
"loss": 0.2997, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.5790472575441261, |
|
"grad_norm": 0.010642267763614655, |
|
"learning_rate": 9.99926555728009e-06, |
|
"loss": 0.0658, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.5866388308977035, |
|
"grad_norm": 33.69260787963867, |
|
"learning_rate": 9.999106324788313e-06, |
|
"loss": 0.2578, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.594230404251281, |
|
"grad_norm": 35.530982971191406, |
|
"learning_rate": 9.998931483215103e-06, |
|
"loss": 0.0085, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6018219776048586, |
|
"grad_norm": 0.02198372408747673, |
|
"learning_rate": 9.998741033106385e-06, |
|
"loss": 0.1038, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.6094135509584362, |
|
"grad_norm": 3.9551048278808594, |
|
"learning_rate": 9.998534975056814e-06, |
|
"loss": 0.1167, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.6170051243120138, |
|
"grad_norm": 1.1452088356018066, |
|
"learning_rate": 9.998313309709782e-06, |
|
"loss": 0.1636, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.6245966976655912, |
|
"grad_norm": 45.56749725341797, |
|
"learning_rate": 9.998076037757408e-06, |
|
"loss": 0.2347, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.6321882710191686, |
|
"grad_norm": 0.002319494029507041, |
|
"learning_rate": 9.997823159940545e-06, |
|
"loss": 0.0795, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.6397798443727463, |
|
"grad_norm": 0.028734903782606125, |
|
"learning_rate": 9.997554677048776e-06, |
|
"loss": 0.2305, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.647371417726324, |
|
"grad_norm": 0.004517258144915104, |
|
"learning_rate": 9.997270589920399e-06, |
|
"loss": 0.0011, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.6549629910799013, |
|
"grad_norm": 1.5917277336120605, |
|
"learning_rate": 9.996970899442444e-06, |
|
"loss": 0.1614, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.6625545644334787, |
|
"grad_norm": 0.05392596498131752, |
|
"learning_rate": 9.996655606550657e-06, |
|
"loss": 0.2937, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.6701461377870563, |
|
"grad_norm": 39.229007720947266, |
|
"learning_rate": 9.996324712229499e-06, |
|
"loss": 0.1227, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.677737711140634, |
|
"grad_norm": 19.827287673950195, |
|
"learning_rate": 9.995978217512146e-06, |
|
"loss": 0.1703, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.6853292844942114, |
|
"grad_norm": 0.007869013585150242, |
|
"learning_rate": 9.995616123480485e-06, |
|
"loss": 0.298, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.692920857847789, |
|
"grad_norm": 17.308448791503906, |
|
"learning_rate": 9.99523843126511e-06, |
|
"loss": 0.2699, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.7005124312013664, |
|
"grad_norm": 0.07290565222501755, |
|
"learning_rate": 9.994845142045315e-06, |
|
"loss": 0.0798, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.708104004554944, |
|
"grad_norm": 0.0642884150147438, |
|
"learning_rate": 9.994436257049098e-06, |
|
"loss": 0.3115, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.7156955779085217, |
|
"grad_norm": 3.773754835128784, |
|
"learning_rate": 9.994011777553152e-06, |
|
"loss": 0.1151, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.723287151262099, |
|
"grad_norm": 12.578306198120117, |
|
"learning_rate": 9.99357170488286e-06, |
|
"loss": 0.2351, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.7308787246156765, |
|
"grad_norm": 0.12735772132873535, |
|
"learning_rate": 9.993116040412289e-06, |
|
"loss": 0.2368, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.7384702979692541, |
|
"grad_norm": 37.49304962158203, |
|
"learning_rate": 9.9926447855642e-06, |
|
"loss": 0.1451, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.7460618713228317, |
|
"grad_norm": 7.337117671966553, |
|
"learning_rate": 9.992157941810027e-06, |
|
"loss": 0.2029, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.7536534446764092, |
|
"grad_norm": 93.44843292236328, |
|
"learning_rate": 9.991655510669875e-06, |
|
"loss": 0.2177, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.7612450180299866, |
|
"grad_norm": 6.563670635223389, |
|
"learning_rate": 9.991137493712524e-06, |
|
"loss": 0.0768, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.7688365913835642, |
|
"grad_norm": 0.021621128544211388, |
|
"learning_rate": 9.990603892555417e-06, |
|
"loss": 0.1178, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.7764281647371418, |
|
"grad_norm": 0.022252781316637993, |
|
"learning_rate": 9.990054708864655e-06, |
|
"loss": 0.1944, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.7840197380907195, |
|
"grad_norm": 21.766817092895508, |
|
"learning_rate": 9.989489944355e-06, |
|
"loss": 0.355, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.7916113114442969, |
|
"grad_norm": 0.05736351013183594, |
|
"learning_rate": 9.988909600789851e-06, |
|
"loss": 0.1318, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.7992028847978743, |
|
"grad_norm": 44.977779388427734, |
|
"learning_rate": 9.988313679981263e-06, |
|
"loss": 0.0222, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.806794458151452, |
|
"grad_norm": 0.016255084425210953, |
|
"learning_rate": 9.987702183789922e-06, |
|
"loss": 0.1285, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.8143860315050295, |
|
"grad_norm": 0.5945267081260681, |
|
"learning_rate": 9.987075114125148e-06, |
|
"loss": 0.3838, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.821977604858607, |
|
"grad_norm": 0.004704204387962818, |
|
"learning_rate": 9.986432472944887e-06, |
|
"loss": 0.1587, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.8295691782121843, |
|
"grad_norm": 0.07433657348155975, |
|
"learning_rate": 9.985774262255708e-06, |
|
"loss": 0.1604, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.837160751565762, |
|
"grad_norm": 0.08134903013706207, |
|
"learning_rate": 9.985100484112786e-06, |
|
"loss": 0.2395, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.8447523249193396, |
|
"grad_norm": 0.5896629095077515, |
|
"learning_rate": 9.984411140619914e-06, |
|
"loss": 0.0397, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.852343898272917, |
|
"grad_norm": 0.0015955844428390265, |
|
"learning_rate": 9.983706233929477e-06, |
|
"loss": 0.2479, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.8599354716264946, |
|
"grad_norm": 12.32898998260498, |
|
"learning_rate": 9.982985766242458e-06, |
|
"loss": 0.071, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.867527044980072, |
|
"grad_norm": 0.17913532257080078, |
|
"learning_rate": 9.98224973980843e-06, |
|
"loss": 0.0426, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.8751186183336497, |
|
"grad_norm": 0.06611054390668869, |
|
"learning_rate": 9.981498156925539e-06, |
|
"loss": 0.3534, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.8827101916872273, |
|
"grad_norm": 2.170029640197754, |
|
"learning_rate": 9.98073101994051e-06, |
|
"loss": 0.1845, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.8903017650408047, |
|
"grad_norm": 3.257478952407837, |
|
"learning_rate": 9.979948331248633e-06, |
|
"loss": 0.0038, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.8978933383943821, |
|
"grad_norm": 138.6713409423828, |
|
"learning_rate": 9.979150093293753e-06, |
|
"loss": 0.1855, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.9054849117479598, |
|
"grad_norm": 0.7939999103546143, |
|
"learning_rate": 9.978336308568266e-06, |
|
"loss": 0.2101, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.9130764851015374, |
|
"grad_norm": 14.836468696594238, |
|
"learning_rate": 9.977506979613118e-06, |
|
"loss": 0.2692, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.9206680584551148, |
|
"grad_norm": 0.4420275390148163, |
|
"learning_rate": 9.97666210901778e-06, |
|
"loss": 0.0356, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.9282596318086922, |
|
"grad_norm": 4.923569679260254, |
|
"learning_rate": 9.975801699420256e-06, |
|
"loss": 0.1263, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.9358512051622698, |
|
"grad_norm": 0.01419526245445013, |
|
"learning_rate": 9.974925753507066e-06, |
|
"loss": 0.0735, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.9434427785158475, |
|
"grad_norm": 72.68999481201172, |
|
"learning_rate": 9.974034274013242e-06, |
|
"loss": 0.0418, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.951034351869425, |
|
"grad_norm": 0.0027209515683352947, |
|
"learning_rate": 9.973127263722317e-06, |
|
"loss": 0.0042, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.9586259252230025, |
|
"grad_norm": 0.015417971648275852, |
|
"learning_rate": 9.972204725466316e-06, |
|
"loss": 0.2174, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.96621749857658, |
|
"grad_norm": 0.013561515137553215, |
|
"learning_rate": 9.971266662125749e-06, |
|
"loss": 0.0808, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.9738090719301575, |
|
"grad_norm": 72.93014526367188, |
|
"learning_rate": 9.9703130766296e-06, |
|
"loss": 0.2353, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.9814006452837352, |
|
"grad_norm": 0.028727278113365173, |
|
"learning_rate": 9.96934397195532e-06, |
|
"loss": 0.0344, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.9889922186373126, |
|
"grad_norm": 6.5093770027160645, |
|
"learning_rate": 9.96835935112882e-06, |
|
"loss": 0.3215, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.99658379199089, |
|
"grad_norm": 45.58213806152344, |
|
"learning_rate": 9.96735921722445e-06, |
|
"loss": 0.4849, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.9996204213323212, |
|
"eval_f1": 0.9241639816476168, |
|
"eval_loss": 0.15625236928462982, |
|
"eval_precision": 0.9250778152019562, |
|
"eval_recall": 0.9241274658573596, |
|
"eval_runtime": 75.5915, |
|
"eval_samples_per_second": 17.436, |
|
"eval_steps_per_second": 17.436, |
|
"step": 2634 |
|
}, |
|
{ |
|
"epoch": 2.0041753653444676, |
|
"grad_norm": 0.07899657636880875, |
|
"learning_rate": 9.966343573365005e-06, |
|
"loss": 0.0937, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.0117669386980452, |
|
"grad_norm": 1.1364494562149048, |
|
"learning_rate": 9.965312422721705e-06, |
|
"loss": 0.0372, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.019358512051623, |
|
"grad_norm": 0.009463181719183922, |
|
"learning_rate": 9.964265768514189e-06, |
|
"loss": 0.1315, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.0269500854052, |
|
"grad_norm": 152.41160583496094, |
|
"learning_rate": 9.963203614010502e-06, |
|
"loss": 0.1601, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.0345416587587777, |
|
"grad_norm": 12.7033109664917, |
|
"learning_rate": 9.962125962527088e-06, |
|
"loss": 0.1492, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.0421332321123553, |
|
"grad_norm": 0.1103023886680603, |
|
"learning_rate": 9.961032817428779e-06, |
|
"loss": 0.044, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.049724805465933, |
|
"grad_norm": 0.04437507316470146, |
|
"learning_rate": 9.959924182128784e-06, |
|
"loss": 0.2004, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.05731637881951, |
|
"grad_norm": 0.016279350966215134, |
|
"learning_rate": 9.958800060088675e-06, |
|
"loss": 0.0789, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.0649079521730878, |
|
"grad_norm": 0.06195428967475891, |
|
"learning_rate": 9.957660454818385e-06, |
|
"loss": 0.1212, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.0724995255266654, |
|
"grad_norm": 0.07117705792188644, |
|
"learning_rate": 9.956505369876187e-06, |
|
"loss": 0.1124, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.080091098880243, |
|
"grad_norm": 0.0017620900180190802, |
|
"learning_rate": 9.955334808868686e-06, |
|
"loss": 0.2135, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.0876826722338206, |
|
"grad_norm": 0.0784306600689888, |
|
"learning_rate": 9.954148775450816e-06, |
|
"loss": 0.0047, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.095274245587398, |
|
"grad_norm": 0.014996266923844814, |
|
"learning_rate": 9.952947273325815e-06, |
|
"loss": 0.0063, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.1028658189409755, |
|
"grad_norm": 3.2599010467529297, |
|
"learning_rate": 9.951730306245222e-06, |
|
"loss": 0.1602, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.110457392294553, |
|
"grad_norm": 0.016863863915205002, |
|
"learning_rate": 9.950497878008865e-06, |
|
"loss": 0.0317, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.1180489656481307, |
|
"grad_norm": 15.340392112731934, |
|
"learning_rate": 9.949249992464847e-06, |
|
"loss": 0.154, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.125640539001708, |
|
"grad_norm": 4.341642379760742, |
|
"learning_rate": 9.947986653509531e-06, |
|
"loss": 0.0257, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.1332321123552855, |
|
"grad_norm": 1.8507261276245117, |
|
"learning_rate": 9.946707865087538e-06, |
|
"loss": 0.1434, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.140823685708863, |
|
"grad_norm": 0.16088451445102692, |
|
"learning_rate": 9.94541363119172e-06, |
|
"loss": 0.0837, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.148415259062441, |
|
"grad_norm": 0.689831018447876, |
|
"learning_rate": 9.944103955863162e-06, |
|
"loss": 0.4116, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.1560068324160184, |
|
"grad_norm": 1.8963958024978638, |
|
"learning_rate": 9.94277884319116e-06, |
|
"loss": 0.1837, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.1635984057695956, |
|
"grad_norm": 0.024928750470280647, |
|
"learning_rate": 9.941438297313215e-06, |
|
"loss": 0.0743, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.1711899791231732, |
|
"grad_norm": 0.006995880510658026, |
|
"learning_rate": 9.940082322415008e-06, |
|
"loss": 0.0001, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.178781552476751, |
|
"grad_norm": 78.55364227294922, |
|
"learning_rate": 9.938710922730404e-06, |
|
"loss": 0.1252, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.1863731258303285, |
|
"grad_norm": 0.013810686767101288, |
|
"learning_rate": 9.937324102541424e-06, |
|
"loss": 0.0243, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.1939646991839057, |
|
"grad_norm": 0.007164845243096352, |
|
"learning_rate": 9.935921866178242e-06, |
|
"loss": 0.0583, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.2015562725374833, |
|
"grad_norm": 0.0043396539986133575, |
|
"learning_rate": 9.934504218019161e-06, |
|
"loss": 0.0862, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.209147845891061, |
|
"grad_norm": 0.007671877276152372, |
|
"learning_rate": 9.933071162490613e-06, |
|
"loss": 0.0016, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 2.2167394192446386, |
|
"grad_norm": 327.8991394042969, |
|
"learning_rate": 9.931622704067133e-06, |
|
"loss": 0.1624, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 2.224330992598216, |
|
"grad_norm": 16.11570167541504, |
|
"learning_rate": 9.93015884727135e-06, |
|
"loss": 0.2645, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 2.2319225659517934, |
|
"grad_norm": 0.005082719959318638, |
|
"learning_rate": 9.928679596673974e-06, |
|
"loss": 0.0002, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.239514139305371, |
|
"grad_norm": 0.01941937580704689, |
|
"learning_rate": 9.927184956893778e-06, |
|
"loss": 0.0612, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.2471057126589487, |
|
"grad_norm": 19.174551010131836, |
|
"learning_rate": 9.925674932597586e-06, |
|
"loss": 0.2042, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 2.2546972860125263, |
|
"grad_norm": 21.23321533203125, |
|
"learning_rate": 9.924149528500259e-06, |
|
"loss": 0.0703, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 2.2622888593661035, |
|
"grad_norm": 0.11990063637495041, |
|
"learning_rate": 9.922608749364684e-06, |
|
"loss": 0.1142, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 2.269880432719681, |
|
"grad_norm": 0.1152704656124115, |
|
"learning_rate": 9.921052600001746e-06, |
|
"loss": 0.102, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 2.2774720060732587, |
|
"grad_norm": 36.8327751159668, |
|
"learning_rate": 9.919481085270328e-06, |
|
"loss": 0.0215, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.2850635794268364, |
|
"grad_norm": 0.06316674500703812, |
|
"learning_rate": 9.917894210077285e-06, |
|
"loss": 0.1024, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 2.292655152780414, |
|
"grad_norm": 0.04541470482945442, |
|
"learning_rate": 9.916291979377436e-06, |
|
"loss": 0.21, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 2.300246726133991, |
|
"grad_norm": 2.5551743507385254, |
|
"learning_rate": 9.914674398173548e-06, |
|
"loss": 0.0009, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 2.307838299487569, |
|
"grad_norm": 0.0514085479080677, |
|
"learning_rate": 9.913041471516311e-06, |
|
"loss": 0.0674, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.3154298728411464, |
|
"grad_norm": 0.09069258719682693, |
|
"learning_rate": 9.911393204504339e-06, |
|
"loss": 0.1548, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.3230214461947236, |
|
"grad_norm": 0.0353839211165905, |
|
"learning_rate": 9.909729602284131e-06, |
|
"loss": 0.1214, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 2.3306130195483012, |
|
"grad_norm": 0.006493726279586554, |
|
"learning_rate": 9.908050670050081e-06, |
|
"loss": 0.0039, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 2.338204592901879, |
|
"grad_norm": 0.009368511848151684, |
|
"learning_rate": 9.906356413044443e-06, |
|
"loss": 0.0779, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 2.3457961662554565, |
|
"grad_norm": 0.011731524951756, |
|
"learning_rate": 9.90464683655732e-06, |
|
"loss": 0.1077, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 2.353387739609034, |
|
"grad_norm": 63.11314392089844, |
|
"learning_rate": 9.902921945926653e-06, |
|
"loss": 0.2824, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.3609793129626113, |
|
"grad_norm": 0.0035196368116885424, |
|
"learning_rate": 9.901181746538196e-06, |
|
"loss": 0.0024, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.368570886316189, |
|
"grad_norm": 0.0374101847410202, |
|
"learning_rate": 9.8994262438255e-06, |
|
"loss": 0.012, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.3761624596697666, |
|
"grad_norm": 16.60328483581543, |
|
"learning_rate": 9.897833211571187e-06, |
|
"loss": 0.466, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.383754033023344, |
|
"grad_norm": 21.628568649291992, |
|
"learning_rate": 9.896048647683e-06, |
|
"loss": 0.1202, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.3913456063769214, |
|
"grad_norm": 15.491986274719238, |
|
"learning_rate": 9.894248796498034e-06, |
|
"loss": 0.056, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.398937179730499, |
|
"grad_norm": 0.009366643615067005, |
|
"learning_rate": 9.892433663636095e-06, |
|
"loss": 0.0003, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.4065287530840767, |
|
"grad_norm": 75.25447082519531, |
|
"learning_rate": 9.890603254764708e-06, |
|
"loss": 0.1785, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.4141203264376543, |
|
"grad_norm": 0.010000905022025108, |
|
"learning_rate": 9.888757575599095e-06, |
|
"loss": 0.1125, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.421711899791232, |
|
"grad_norm": 0.21319662034511566, |
|
"learning_rate": 9.886896631902156e-06, |
|
"loss": 0.0575, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.429303473144809, |
|
"grad_norm": 6.481915473937988, |
|
"learning_rate": 9.885020429484457e-06, |
|
"loss": 0.2689, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.4368950464983867, |
|
"grad_norm": 0.20284566283226013, |
|
"learning_rate": 9.8831289742042e-06, |
|
"loss": 0.149, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.4444866198519644, |
|
"grad_norm": 1.3910574913024902, |
|
"learning_rate": 9.881222271967224e-06, |
|
"loss": 0.0142, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.452078193205542, |
|
"grad_norm": 0.09682253748178482, |
|
"learning_rate": 9.879300328726958e-06, |
|
"loss": 0.0021, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.459669766559119, |
|
"grad_norm": 0.005042471457272768, |
|
"learning_rate": 9.877363150484434e-06, |
|
"loss": 0.2168, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.467261339912697, |
|
"grad_norm": 64.47718811035156, |
|
"learning_rate": 9.875410743288246e-06, |
|
"loss": 0.1994, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.4748529132662744, |
|
"grad_norm": 0.2548009753227234, |
|
"learning_rate": 9.873443113234541e-06, |
|
"loss": 0.2271, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.482444486619852, |
|
"grad_norm": 0.008805714547634125, |
|
"learning_rate": 9.871460266466996e-06, |
|
"loss": 0.0827, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 2.4900360599734297, |
|
"grad_norm": 0.05888598784804344, |
|
"learning_rate": 9.8694622091768e-06, |
|
"loss": 0.054, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.497627633327007, |
|
"grad_norm": 0.004817333538085222, |
|
"learning_rate": 9.867448947602637e-06, |
|
"loss": 0.105, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 2.5052192066805845, |
|
"grad_norm": 0.04850906506180763, |
|
"learning_rate": 9.865420488030664e-06, |
|
"loss": 0.2363, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.512810780034162, |
|
"grad_norm": 0.14938922226428986, |
|
"learning_rate": 9.86337683679449e-06, |
|
"loss": 0.1593, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 2.5204023533877398, |
|
"grad_norm": 18.9013729095459, |
|
"learning_rate": 9.861318000275158e-06, |
|
"loss": 0.2351, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.527993926741317, |
|
"grad_norm": 0.025823410600423813, |
|
"learning_rate": 9.85924398490113e-06, |
|
"loss": 0.0022, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 2.5355855000948946, |
|
"grad_norm": 28.33924674987793, |
|
"learning_rate": 9.857154797148255e-06, |
|
"loss": 0.2312, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.543177073448472, |
|
"grad_norm": 0.001974069746211171, |
|
"learning_rate": 9.855050443539761e-06, |
|
"loss": 0.0002, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.55076864680205, |
|
"grad_norm": 21.997047424316406, |
|
"learning_rate": 9.852930930646228e-06, |
|
"loss": 0.1257, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.5583602201556275, |
|
"grad_norm": 0.48950299620628357, |
|
"learning_rate": 9.850796265085567e-06, |
|
"loss": 0.0062, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 2.5659517935092047, |
|
"grad_norm": 8.470258712768555, |
|
"learning_rate": 9.848646453523005e-06, |
|
"loss": 0.0585, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 2.5735433668627823, |
|
"grad_norm": 0.11571002751588821, |
|
"learning_rate": 9.846481502671056e-06, |
|
"loss": 0.0329, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 2.58113494021636, |
|
"grad_norm": 11.877908706665039, |
|
"learning_rate": 9.844301419289511e-06, |
|
"loss": 0.2921, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.588726513569937, |
|
"grad_norm": 36.33771896362305, |
|
"learning_rate": 9.842106210185403e-06, |
|
"loss": 0.2223, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 2.5963180869235147, |
|
"grad_norm": 2.979523181915283, |
|
"learning_rate": 9.839895882212997e-06, |
|
"loss": 0.0653, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.6039096602770924, |
|
"grad_norm": 0.013308779336512089, |
|
"learning_rate": 9.837670442273768e-06, |
|
"loss": 0.0735, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 2.61150123363067, |
|
"grad_norm": 111.11514282226562, |
|
"learning_rate": 9.835429897316367e-06, |
|
"loss": 0.1495, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.6190928069842476, |
|
"grad_norm": 0.007320565637201071, |
|
"learning_rate": 9.833174254336618e-06, |
|
"loss": 0.3018, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.6266843803378253, |
|
"grad_norm": 0.010831023566424847, |
|
"learning_rate": 9.830903520377482e-06, |
|
"loss": 0.0203, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 2.6342759536914024, |
|
"grad_norm": 18.389625549316406, |
|
"learning_rate": 9.82861770252904e-06, |
|
"loss": 0.1973, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.64186752704498, |
|
"grad_norm": 12.364988327026367, |
|
"learning_rate": 9.826316807928468e-06, |
|
"loss": 0.0988, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.6494591003985577, |
|
"grad_norm": 0.0008839545771479607, |
|
"learning_rate": 9.824000843760028e-06, |
|
"loss": 0.0552, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 2.657050673752135, |
|
"grad_norm": 0.028787225484848022, |
|
"learning_rate": 9.821669817255021e-06, |
|
"loss": 0.1918, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.6646422471057125, |
|
"grad_norm": 0.007524173706769943, |
|
"learning_rate": 9.819323735691787e-06, |
|
"loss": 0.0056, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 2.67223382045929, |
|
"grad_norm": 7.9602837562561035, |
|
"learning_rate": 9.816962606395668e-06, |
|
"loss": 0.1273, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.6798253938128678, |
|
"grad_norm": 15.868315696716309, |
|
"learning_rate": 9.814586436738998e-06, |
|
"loss": 0.0943, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 2.6874169671664454, |
|
"grad_norm": 0.2785890996456146, |
|
"learning_rate": 9.812195234141064e-06, |
|
"loss": 0.1291, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.695008540520023, |
|
"grad_norm": 63.62078857421875, |
|
"learning_rate": 9.809789006068097e-06, |
|
"loss": 0.0672, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.7026001138736, |
|
"grad_norm": 2.8807220458984375, |
|
"learning_rate": 9.807367760033245e-06, |
|
"loss": 0.217, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.710191687227178, |
|
"grad_norm": 36.00885009765625, |
|
"learning_rate": 9.80493150359654e-06, |
|
"loss": 0.1016, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 2.7177832605807555, |
|
"grad_norm": 0.021623503416776657, |
|
"learning_rate": 9.80248024436489e-06, |
|
"loss": 0.2195, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.7253748339343327, |
|
"grad_norm": 0.03640507906675339, |
|
"learning_rate": 9.800013989992042e-06, |
|
"loss": 0.0001, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 2.7329664072879103, |
|
"grad_norm": 27.120119094848633, |
|
"learning_rate": 9.797532748178566e-06, |
|
"loss": 0.4964, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.740557980641488, |
|
"grad_norm": 0.08877989649772644, |
|
"learning_rate": 9.795036526671828e-06, |
|
"loss": 0.0498, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.7481495539950656, |
|
"grad_norm": 0.0727711170911789, |
|
"learning_rate": 9.792525333265965e-06, |
|
"loss": 0.1452, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.755741127348643, |
|
"grad_norm": 0.21834716200828552, |
|
"learning_rate": 9.789999175801866e-06, |
|
"loss": 0.1315, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 2.763332700702221, |
|
"grad_norm": 3.933009147644043, |
|
"learning_rate": 9.787458062167135e-06, |
|
"loss": 0.0726, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.770924274055798, |
|
"grad_norm": 0.00495730759575963, |
|
"learning_rate": 9.784902000296084e-06, |
|
"loss": 0.0092, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.7785158474093756, |
|
"grad_norm": 0.06244872510433197, |
|
"learning_rate": 9.782330998169695e-06, |
|
"loss": 0.2204, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.7861074207629533, |
|
"grad_norm": 0.032471269369125366, |
|
"learning_rate": 9.779745063815598e-06, |
|
"loss": 0.0887, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.7936989941165304, |
|
"grad_norm": 0.0014243993209674954, |
|
"learning_rate": 9.777144205308049e-06, |
|
"loss": 0.1105, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.801290567470108, |
|
"grad_norm": 69.43852233886719, |
|
"learning_rate": 9.774528430767902e-06, |
|
"loss": 0.0603, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.8088821408236857, |
|
"grad_norm": 0.06080542132258415, |
|
"learning_rate": 9.771897748362583e-06, |
|
"loss": 0.0163, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.8164737141772633, |
|
"grad_norm": 0.09897174686193466, |
|
"learning_rate": 9.769252166306066e-06, |
|
"loss": 0.1167, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 2.824065287530841, |
|
"grad_norm": 0.20604291558265686, |
|
"learning_rate": 9.766591692858854e-06, |
|
"loss": 0.0706, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.831656860884418, |
|
"grad_norm": 32.105499267578125, |
|
"learning_rate": 9.763916336327935e-06, |
|
"loss": 0.5321, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 2.8392484342379958, |
|
"grad_norm": 0.00609110202640295, |
|
"learning_rate": 9.761226105066778e-06, |
|
"loss": 0.0794, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.8468400075915734, |
|
"grad_norm": 0.14252524077892303, |
|
"learning_rate": 9.75852100747529e-06, |
|
"loss": 0.1037, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.854431580945151, |
|
"grad_norm": 0.0007404695497825742, |
|
"learning_rate": 9.7558010519998e-06, |
|
"loss": 0.0552, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.8620231542987282, |
|
"grad_norm": 0.007310529239475727, |
|
"learning_rate": 9.753066247133025e-06, |
|
"loss": 0.009, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.869614727652306, |
|
"grad_norm": 88.26655578613281, |
|
"learning_rate": 9.750316601414051e-06, |
|
"loss": 0.1008, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.8772063010058835, |
|
"grad_norm": 0.01418048795312643, |
|
"learning_rate": 9.7475521234283e-06, |
|
"loss": 0.0262, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.884797874359461, |
|
"grad_norm": 0.06487419456243515, |
|
"learning_rate": 9.744772821807509e-06, |
|
"loss": 0.1206, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.8923894477130387, |
|
"grad_norm": 0.0070535228587687016, |
|
"learning_rate": 9.741978705229697e-06, |
|
"loss": 0.0897, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.899981021066616, |
|
"grad_norm": 1.5489246845245361, |
|
"learning_rate": 9.739169782419143e-06, |
|
"loss": 0.0008, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.9075725944201936, |
|
"grad_norm": 0.001165062771178782, |
|
"learning_rate": 9.736346062146356e-06, |
|
"loss": 0.0239, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.915164167773771, |
|
"grad_norm": 0.0013667664024978876, |
|
"learning_rate": 9.733507553228045e-06, |
|
"loss": 0.0017, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.9227557411273484, |
|
"grad_norm": 0.004272387828677893, |
|
"learning_rate": 9.7306542645271e-06, |
|
"loss": 0.1874, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.930347314480926, |
|
"grad_norm": 0.032470703125, |
|
"learning_rate": 9.727786204952554e-06, |
|
"loss": 0.0128, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.9379388878345036, |
|
"grad_norm": 0.010683764703571796, |
|
"learning_rate": 9.724903383459566e-06, |
|
"loss": 0.064, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.9455304611880813, |
|
"grad_norm": 11.981929779052734, |
|
"learning_rate": 9.722005809049382e-06, |
|
"loss": 0.2962, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.953122034541659, |
|
"grad_norm": 7.638548374176025, |
|
"learning_rate": 9.719093490769315e-06, |
|
"loss": 0.2084, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.9607136078952365, |
|
"grad_norm": 0.0027020114939659834, |
|
"learning_rate": 9.71616643771271e-06, |
|
"loss": 0.0521, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.9683051812488137, |
|
"grad_norm": 0.041696127504110336, |
|
"learning_rate": 9.713224659018927e-06, |
|
"loss": 0.1488, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.9758967546023913, |
|
"grad_norm": 276.02947998046875, |
|
"learning_rate": 9.710268163873298e-06, |
|
"loss": 0.1649, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.983488327955969, |
|
"grad_norm": 4.512789726257324, |
|
"learning_rate": 9.707296961507107e-06, |
|
"loss": 0.0364, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.991079901309546, |
|
"grad_norm": 0.07038887590169907, |
|
"learning_rate": 9.70431106119756e-06, |
|
"loss": 0.046, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.998671474663124, |
|
"grad_norm": 52.16018295288086, |
|
"learning_rate": 9.701310472267757e-06, |
|
"loss": 0.1439, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.999430631998482, |
|
"eval_f1": 0.9468203897167411, |
|
"eval_loss": 0.17982631921768188, |
|
"eval_precision": 0.9451027269774426, |
|
"eval_recall": 0.9468892261001517, |
|
"eval_runtime": 75.734, |
|
"eval_samples_per_second": 17.403, |
|
"eval_steps_per_second": 17.403, |
|
"step": 3951 |
|
}, |
|
{ |
|
"epoch": 3.0062630480167014, |
|
"grad_norm": 0.003164840629324317, |
|
"learning_rate": 9.69829520408666e-06, |
|
"loss": 0.0687, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 3.013854621370279, |
|
"grad_norm": 0.005421197507530451, |
|
"learning_rate": 9.695265266069066e-06, |
|
"loss": 0.1768, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 3.0214461947238567, |
|
"grad_norm": 0.07668659836053848, |
|
"learning_rate": 9.692220667675572e-06, |
|
"loss": 0.0092, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 3.029037768077434, |
|
"grad_norm": 0.0020935048814862967, |
|
"learning_rate": 9.689161418412557e-06, |
|
"loss": 0.2435, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 3.0366293414310115, |
|
"grad_norm": 0.012631943449378014, |
|
"learning_rate": 9.68608752783214e-06, |
|
"loss": 0.0858, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.044220914784589, |
|
"grad_norm": 0.005341747775673866, |
|
"learning_rate": 9.682999005532161e-06, |
|
"loss": 0.0094, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 3.0518124881381667, |
|
"grad_norm": 0.02143806405365467, |
|
"learning_rate": 9.67989586115614e-06, |
|
"loss": 0.0031, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 3.0594040614917444, |
|
"grad_norm": 13.902883529663086, |
|
"learning_rate": 9.67677810439326e-06, |
|
"loss": 0.0965, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 3.0669956348453216, |
|
"grad_norm": 0.20893624424934387, |
|
"learning_rate": 9.67364574497832e-06, |
|
"loss": 0.107, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 3.074587208198899, |
|
"grad_norm": 0.18238410353660583, |
|
"learning_rate": 9.67049879269172e-06, |
|
"loss": 0.001, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 3.082178781552477, |
|
"grad_norm": 0.022665822878479958, |
|
"learning_rate": 9.667337257359425e-06, |
|
"loss": 0.1673, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 3.0897703549060545, |
|
"grad_norm": 10.807044982910156, |
|
"learning_rate": 9.664161148852932e-06, |
|
"loss": 0.0674, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 3.0973619282596316, |
|
"grad_norm": 0.0026043581310659647, |
|
"learning_rate": 9.660970477089238e-06, |
|
"loss": 0.0097, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 3.1049535016132093, |
|
"grad_norm": 18.194334030151367, |
|
"learning_rate": 9.657765252030815e-06, |
|
"loss": 0.0064, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 3.112545074966787, |
|
"grad_norm": 12.572392463684082, |
|
"learning_rate": 9.654545483685578e-06, |
|
"loss": 0.1343, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 3.1201366483203645, |
|
"grad_norm": 0.0007624260615557432, |
|
"learning_rate": 9.651311182106848e-06, |
|
"loss": 0.0325, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 3.1277282216739417, |
|
"grad_norm": 0.018368422985076904, |
|
"learning_rate": 9.648062357393325e-06, |
|
"loss": 0.0005, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 3.1353197950275193, |
|
"grad_norm": 78.8929443359375, |
|
"learning_rate": 9.644799019689056e-06, |
|
"loss": 0.054, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 3.142911368381097, |
|
"grad_norm": 0.010049775242805481, |
|
"learning_rate": 9.641521179183403e-06, |
|
"loss": 0.0157, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 3.1505029417346746, |
|
"grad_norm": 91.76640319824219, |
|
"learning_rate": 9.638228846111011e-06, |
|
"loss": 0.1893, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 3.1580945150882522, |
|
"grad_norm": 0.30123358964920044, |
|
"learning_rate": 9.634922030751777e-06, |
|
"loss": 0.2819, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 3.1656860884418294, |
|
"grad_norm": 32.838623046875, |
|
"learning_rate": 9.631600743430817e-06, |
|
"loss": 0.2494, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 3.173277661795407, |
|
"grad_norm": 0.1474120020866394, |
|
"learning_rate": 9.628264994518431e-06, |
|
"loss": 0.0401, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 3.1808692351489847, |
|
"grad_norm": 0.16810506582260132, |
|
"learning_rate": 9.624914794430078e-06, |
|
"loss": 0.0668, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 3.1884608085025623, |
|
"grad_norm": 1.5835288763046265, |
|
"learning_rate": 9.621550153626338e-06, |
|
"loss": 0.1177, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.1960523818561395, |
|
"grad_norm": 0.00022748277115169913, |
|
"learning_rate": 9.618171082612875e-06, |
|
"loss": 0.006, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 3.203643955209717, |
|
"grad_norm": 0.011720534414052963, |
|
"learning_rate": 9.614777591940419e-06, |
|
"loss": 0.0547, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 3.2112355285632947, |
|
"grad_norm": 16.759693145751953, |
|
"learning_rate": 9.611369692204712e-06, |
|
"loss": 0.0687, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 3.2188271019168724, |
|
"grad_norm": 13.746438026428223, |
|
"learning_rate": 9.6079473940465e-06, |
|
"loss": 0.1731, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 3.22641867527045, |
|
"grad_norm": 1.0661725997924805, |
|
"learning_rate": 9.604510708151472e-06, |
|
"loss": 0.0012, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 3.234010248624027, |
|
"grad_norm": 0.0051275817677378654, |
|
"learning_rate": 9.601059645250253e-06, |
|
"loss": 0.1559, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 3.241601821977605, |
|
"grad_norm": 0.03845924511551857, |
|
"learning_rate": 9.59759421611835e-06, |
|
"loss": 0.0414, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 3.2491933953311825, |
|
"grad_norm": 0.2744313180446625, |
|
"learning_rate": 9.594114431576133e-06, |
|
"loss": 0.2521, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 3.25678496868476, |
|
"grad_norm": 0.06969039887189865, |
|
"learning_rate": 9.590620302488792e-06, |
|
"loss": 0.1007, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 3.2643765420383373, |
|
"grad_norm": 0.044375017285346985, |
|
"learning_rate": 9.587111839766303e-06, |
|
"loss": 0.1706, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 3.271968115391915, |
|
"grad_norm": 0.008467442356050014, |
|
"learning_rate": 9.583589054363402e-06, |
|
"loss": 0.0518, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 3.2795596887454925, |
|
"grad_norm": 0.006757930386811495, |
|
"learning_rate": 9.580051957279545e-06, |
|
"loss": 0.1301, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 3.28715126209907, |
|
"grad_norm": 0.22480565309524536, |
|
"learning_rate": 9.57650055955887e-06, |
|
"loss": 0.2225, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 3.294742835452648, |
|
"grad_norm": 0.005938298534601927, |
|
"learning_rate": 9.572934872290175e-06, |
|
"loss": 0.1615, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 3.302334408806225, |
|
"grad_norm": 0.031019240617752075, |
|
"learning_rate": 9.569354906606864e-06, |
|
"loss": 0.0292, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 3.3099259821598026, |
|
"grad_norm": 0.058189138770103455, |
|
"learning_rate": 9.565760673686936e-06, |
|
"loss": 0.1437, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 3.3175175555133802, |
|
"grad_norm": 18.81794548034668, |
|
"learning_rate": 9.56215218475293e-06, |
|
"loss": 0.1732, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 3.325109128866958, |
|
"grad_norm": 0.037775713950395584, |
|
"learning_rate": 9.558529451071896e-06, |
|
"loss": 0.0048, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 3.332700702220535, |
|
"grad_norm": 0.014422253705561161, |
|
"learning_rate": 9.55489248395537e-06, |
|
"loss": 0.0021, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 3.3402922755741127, |
|
"grad_norm": 30.743995666503906, |
|
"learning_rate": 9.551241294759322e-06, |
|
"loss": 0.238, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.3478838489276903, |
|
"grad_norm": 1.6870224475860596, |
|
"learning_rate": 9.547575894884132e-06, |
|
"loss": 0.09, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 3.355475422281268, |
|
"grad_norm": 0.03549875691533089, |
|
"learning_rate": 9.54389629577455e-06, |
|
"loss": 0.163, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 3.3630669956348456, |
|
"grad_norm": 0.12179459631443024, |
|
"learning_rate": 9.540202508919663e-06, |
|
"loss": 0.0025, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 3.3706585689884228, |
|
"grad_norm": 0.000569705618545413, |
|
"learning_rate": 9.536494545852854e-06, |
|
"loss": 0.0433, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 3.3782501423420004, |
|
"grad_norm": 0.0051111155189573765, |
|
"learning_rate": 9.532772418151777e-06, |
|
"loss": 0.1015, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 3.385841715695578, |
|
"grad_norm": 0.0955556184053421, |
|
"learning_rate": 9.529036137438304e-06, |
|
"loss": 0.2303, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 3.393433289049155, |
|
"grad_norm": 0.02819570153951645, |
|
"learning_rate": 9.5252857153785e-06, |
|
"loss": 0.0003, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 3.401024862402733, |
|
"grad_norm": 0.005423153750598431, |
|
"learning_rate": 9.521521163682593e-06, |
|
"loss": 0.0102, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 3.4086164357563105, |
|
"grad_norm": 0.8613097667694092, |
|
"learning_rate": 9.517742494104918e-06, |
|
"loss": 0.0005, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 3.416208009109888, |
|
"grad_norm": 0.2508643567562103, |
|
"learning_rate": 9.513949718443898e-06, |
|
"loss": 0.0711, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.4237995824634657, |
|
"grad_norm": 0.026635829359292984, |
|
"learning_rate": 9.510142848541998e-06, |
|
"loss": 0.0596, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 3.431391155817043, |
|
"grad_norm": 0.0043787783943116665, |
|
"learning_rate": 9.50632189628569e-06, |
|
"loss": 0.3671, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 3.4389827291706205, |
|
"grad_norm": 0.05850038304924965, |
|
"learning_rate": 9.502486873605419e-06, |
|
"loss": 0.1132, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 3.446574302524198, |
|
"grad_norm": 157.52146911621094, |
|
"learning_rate": 9.49863779247556e-06, |
|
"loss": 0.1559, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 3.454165875877776, |
|
"grad_norm": 0.02441789209842682, |
|
"learning_rate": 9.494774664914385e-06, |
|
"loss": 0.0658, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 3.461757449231353, |
|
"grad_norm": 1.3454347848892212, |
|
"learning_rate": 9.490897502984028e-06, |
|
"loss": 0.0128, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 3.4693490225849306, |
|
"grad_norm": 0.012022917158901691, |
|
"learning_rate": 9.487006318790435e-06, |
|
"loss": 0.0266, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 3.4769405959385082, |
|
"grad_norm": 0.01288307923823595, |
|
"learning_rate": 9.483101124483345e-06, |
|
"loss": 0.0001, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 3.484532169292086, |
|
"grad_norm": 26.168624877929688, |
|
"learning_rate": 9.479181932256232e-06, |
|
"loss": 0.0258, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 3.4921237426456635, |
|
"grad_norm": 0.004901974927634001, |
|
"learning_rate": 9.475248754346282e-06, |
|
"loss": 0.1046, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.4997153159992407, |
|
"grad_norm": 0.001919193658977747, |
|
"learning_rate": 9.471301603034353e-06, |
|
"loss": 0.0766, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 3.5073068893528183, |
|
"grad_norm": 0.030080076307058334, |
|
"learning_rate": 9.467340490644923e-06, |
|
"loss": 0.0022, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 3.514898462706396, |
|
"grad_norm": 0.041573066264390945, |
|
"learning_rate": 9.463365429546073e-06, |
|
"loss": 0.0357, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 3.5224900360599736, |
|
"grad_norm": 30.251873016357422, |
|
"learning_rate": 9.459376432149429e-06, |
|
"loss": 0.0533, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 3.5300816094135508, |
|
"grad_norm": 58.92287826538086, |
|
"learning_rate": 9.455373510910135e-06, |
|
"loss": 0.1241, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 3.5376731827671284, |
|
"grad_norm": 0.015299913473427296, |
|
"learning_rate": 9.45135667832681e-06, |
|
"loss": 0.0672, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 3.545264756120706, |
|
"grad_norm": 0.024773746728897095, |
|
"learning_rate": 9.447325946941509e-06, |
|
"loss": 0.0002, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 3.5528563294742836, |
|
"grad_norm": 0.0013335061958059669, |
|
"learning_rate": 9.443281329339682e-06, |
|
"loss": 0.0002, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 3.5604479028278613, |
|
"grad_norm": 0.003542415564879775, |
|
"learning_rate": 9.439222838150141e-06, |
|
"loss": 0.0053, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 3.5680394761814385, |
|
"grad_norm": 0.004198325797915459, |
|
"learning_rate": 9.435150486045019e-06, |
|
"loss": 0.0021, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 3.575631049535016, |
|
"grad_norm": 0.012465923093259335, |
|
"learning_rate": 9.431064285739717e-06, |
|
"loss": 0.391, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 3.5832226228885937, |
|
"grad_norm": 19.51753044128418, |
|
"learning_rate": 9.426964249992885e-06, |
|
"loss": 0.0163, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 3.5908141962421714, |
|
"grad_norm": 15.74682903289795, |
|
"learning_rate": 9.42285039160637e-06, |
|
"loss": 0.1393, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 3.5984057695957485, |
|
"grad_norm": 0.001853258814662695, |
|
"learning_rate": 9.418722723425179e-06, |
|
"loss": 0.1333, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 3.605997342949326, |
|
"grad_norm": 0.00429703202098608, |
|
"learning_rate": 9.414581258337433e-06, |
|
"loss": 0.041, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 3.613588916302904, |
|
"grad_norm": 0.019961683079600334, |
|
"learning_rate": 9.410426009274343e-06, |
|
"loss": 0.0041, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 3.6211804896564814, |
|
"grad_norm": 0.003665096592158079, |
|
"learning_rate": 9.406256989210146e-06, |
|
"loss": 0.1252, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 3.628772063010059, |
|
"grad_norm": 59.87676239013672, |
|
"learning_rate": 9.402074211162086e-06, |
|
"loss": 0.2175, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 3.6363636363636362, |
|
"grad_norm": 0.0013629102613776922, |
|
"learning_rate": 9.397877688190362e-06, |
|
"loss": 0.026, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 3.643955209717214, |
|
"grad_norm": 0.004092271439731121, |
|
"learning_rate": 9.39366743339809e-06, |
|
"loss": 0.0061, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.6515467830707915, |
|
"grad_norm": 0.06597864627838135, |
|
"learning_rate": 9.38944345993126e-06, |
|
"loss": 0.0974, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 3.6591383564243687, |
|
"grad_norm": 0.0014479252276942134, |
|
"learning_rate": 9.3852057809787e-06, |
|
"loss": 0.1248, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 3.6667299297779463, |
|
"grad_norm": 0.0007850687834434211, |
|
"learning_rate": 9.380954409772029e-06, |
|
"loss": 0.0674, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 3.674321503131524, |
|
"grad_norm": 0.009199988096952438, |
|
"learning_rate": 9.376689359585623e-06, |
|
"loss": 0.0707, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 3.6819130764851016, |
|
"grad_norm": 0.001353310770355165, |
|
"learning_rate": 9.37241064373656e-06, |
|
"loss": 0.0001, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 3.689504649838679, |
|
"grad_norm": 0.0004105101979803294, |
|
"learning_rate": 9.368118275584596e-06, |
|
"loss": 0.0161, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 3.697096223192257, |
|
"grad_norm": 0.005007717292755842, |
|
"learning_rate": 9.36381226853211e-06, |
|
"loss": 0.0854, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 3.704687796545834, |
|
"grad_norm": 0.001610257662832737, |
|
"learning_rate": 9.359492636024067e-06, |
|
"loss": 0.0002, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 3.7122793698994117, |
|
"grad_norm": 0.0029359892942011356, |
|
"learning_rate": 9.35515939154798e-06, |
|
"loss": 0.0001, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 3.7198709432529893, |
|
"grad_norm": 0.016431191936135292, |
|
"learning_rate": 9.350812548633862e-06, |
|
"loss": 0.0407, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 3.7274625166065665, |
|
"grad_norm": 0.00021083364845253527, |
|
"learning_rate": 9.346452120854176e-06, |
|
"loss": 0.0001, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 3.735054089960144, |
|
"grad_norm": 0.0014973161742091179, |
|
"learning_rate": 9.342078121823817e-06, |
|
"loss": 0.2248, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 3.7426456633137217, |
|
"grad_norm": 0.01354212500154972, |
|
"learning_rate": 9.337690565200042e-06, |
|
"loss": 0.07, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 3.7502372366672994, |
|
"grad_norm": 0.07265155762434006, |
|
"learning_rate": 9.333289464682452e-06, |
|
"loss": 0.0486, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 3.757828810020877, |
|
"grad_norm": 0.0004681596765294671, |
|
"learning_rate": 9.328874834012925e-06, |
|
"loss": 0.0063, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 3.7654203833744546, |
|
"grad_norm": 0.01314933318644762, |
|
"learning_rate": 9.324446686975592e-06, |
|
"loss": 0.0853, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 3.773011956728032, |
|
"grad_norm": 0.00873385276645422, |
|
"learning_rate": 9.320005037396787e-06, |
|
"loss": 0.0936, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 3.7806035300816094, |
|
"grad_norm": 10.59278678894043, |
|
"learning_rate": 9.315549899145001e-06, |
|
"loss": 0.1606, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 3.788195103435187, |
|
"grad_norm": 0.0031807045452296734, |
|
"learning_rate": 9.311081286130846e-06, |
|
"loss": 0.1216, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 3.7957866767887642, |
|
"grad_norm": 0.15921778976917267, |
|
"learning_rate": 9.306599212307001e-06, |
|
"loss": 0.1834, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.803378250142342, |
|
"grad_norm": 0.24746917188167572, |
|
"learning_rate": 9.302103691668182e-06, |
|
"loss": 0.0025, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 3.8109698234959195, |
|
"grad_norm": 23.347986221313477, |
|
"learning_rate": 9.297594738251085e-06, |
|
"loss": 0.155, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 3.818561396849497, |
|
"grad_norm": 10.753530502319336, |
|
"learning_rate": 9.293072366134353e-06, |
|
"loss": 0.1938, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 3.8261529702030748, |
|
"grad_norm": 11.585359573364258, |
|
"learning_rate": 9.288536589438523e-06, |
|
"loss": 0.0768, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 3.833744543556652, |
|
"grad_norm": 0.035775743424892426, |
|
"learning_rate": 9.283987422325988e-06, |
|
"loss": 0.0124, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 3.8413361169102296, |
|
"grad_norm": 0.008631790988147259, |
|
"learning_rate": 9.279424879000948e-06, |
|
"loss": 0.0634, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 3.848927690263807, |
|
"grad_norm": 8.152615547180176, |
|
"learning_rate": 9.274848973709378e-06, |
|
"loss": 0.0008, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 3.856519263617385, |
|
"grad_norm": 0.00742849987000227, |
|
"learning_rate": 9.270259720738962e-06, |
|
"loss": 0.0023, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 3.864110836970962, |
|
"grad_norm": 0.00474806921556592, |
|
"learning_rate": 9.265657134419068e-06, |
|
"loss": 0.0822, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 3.8717024103245397, |
|
"grad_norm": 0.04680832102894783, |
|
"learning_rate": 9.261041229120693e-06, |
|
"loss": 0.4435, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 3.8792939836781173, |
|
"grad_norm": 0.05589527264237404, |
|
"learning_rate": 9.25641201925642e-06, |
|
"loss": 0.0161, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 3.886885557031695, |
|
"grad_norm": 0.0864788219332695, |
|
"learning_rate": 9.251769519280377e-06, |
|
"loss": 0.0042, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 3.8944771303852725, |
|
"grad_norm": 0.0046981326304376125, |
|
"learning_rate": 9.247113743688188e-06, |
|
"loss": 0.0202, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 3.9020687037388497, |
|
"grad_norm": 0.008091968484222889, |
|
"learning_rate": 9.242444707016924e-06, |
|
"loss": 0.0255, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 3.9096602770924274, |
|
"grad_norm": 0.016733279451727867, |
|
"learning_rate": 9.237762423845067e-06, |
|
"loss": 0.0609, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 3.917251850446005, |
|
"grad_norm": 110.93751525878906, |
|
"learning_rate": 9.233066908792459e-06, |
|
"loss": 0.0854, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 3.9248434237995826, |
|
"grad_norm": 0.0014931544428691268, |
|
"learning_rate": 9.228358176520256e-06, |
|
"loss": 0.5116, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 3.93243499715316, |
|
"grad_norm": 0.013354528695344925, |
|
"learning_rate": 9.22363624173088e-06, |
|
"loss": 0.1488, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 3.9400265705067374, |
|
"grad_norm": 0.00550916837528348, |
|
"learning_rate": 9.218901119167983e-06, |
|
"loss": 0.3537, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 3.947618143860315, |
|
"grad_norm": 29.100811004638672, |
|
"learning_rate": 9.214152823616385e-06, |
|
"loss": 0.2662, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.9552097172138927, |
|
"grad_norm": 0.0014990021008998156, |
|
"learning_rate": 9.209391369902048e-06, |
|
"loss": 0.2909, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 3.9628012905674703, |
|
"grad_norm": 0.2769727110862732, |
|
"learning_rate": 9.20461677289201e-06, |
|
"loss": 0.131, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 3.9703928639210475, |
|
"grad_norm": 0.04668630287051201, |
|
"learning_rate": 9.199829047494351e-06, |
|
"loss": 0.001, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 3.977984437274625, |
|
"grad_norm": 0.005737427622079849, |
|
"learning_rate": 9.195028208658143e-06, |
|
"loss": 0.1876, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 3.9855760106282028, |
|
"grad_norm": 0.0012742755934596062, |
|
"learning_rate": 9.190214271373399e-06, |
|
"loss": 0.0296, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 3.99316758398178, |
|
"grad_norm": 0.23183897137641907, |
|
"learning_rate": 9.185387250671037e-06, |
|
"loss": 0.0464, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.9484240795008525, |
|
"eval_loss": 0.1520499438047409, |
|
"eval_precision": 0.9488593551067371, |
|
"eval_recall": 0.9484066767830045, |
|
"eval_runtime": 75.764, |
|
"eval_samples_per_second": 17.396, |
|
"eval_steps_per_second": 17.396, |
|
"step": 5269 |
|
}, |
|
{ |
|
"epoch": 4.000759157335358, |
|
"grad_norm": 0.015655217692255974, |
|
"learning_rate": 9.18054716162282e-06, |
|
"loss": 0.0778, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 4.008350730688935, |
|
"grad_norm": 51.39549255371094, |
|
"learning_rate": 9.175694019341321e-06, |
|
"loss": 0.1821, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 4.015942304042513, |
|
"grad_norm": 20.591053009033203, |
|
"learning_rate": 9.170827838979864e-06, |
|
"loss": 0.0411, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 4.0235338773960905, |
|
"grad_norm": 0.00464022858068347, |
|
"learning_rate": 9.165948635732487e-06, |
|
"loss": 0.024, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 4.031125450749668, |
|
"grad_norm": 0.9038947820663452, |
|
"learning_rate": 9.161056424833888e-06, |
|
"loss": 0.1133, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 4.038717024103246, |
|
"grad_norm": 104.494384765625, |
|
"learning_rate": 9.156151221559384e-06, |
|
"loss": 0.0752, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 4.046308597456823, |
|
"grad_norm": 0.003295379225164652, |
|
"learning_rate": 9.151233041224851e-06, |
|
"loss": 0.0697, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 4.0539001708104, |
|
"grad_norm": 0.0672566145658493, |
|
"learning_rate": 9.146301899186696e-06, |
|
"loss": 0.0149, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 4.061491744163978, |
|
"grad_norm": 0.020139316096901894, |
|
"learning_rate": 9.141357810841785e-06, |
|
"loss": 0.0004, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 4.069083317517555, |
|
"grad_norm": 0.18405619263648987, |
|
"learning_rate": 9.136400791627414e-06, |
|
"loss": 0.0003, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 4.076674890871133, |
|
"grad_norm": 0.011098051443696022, |
|
"learning_rate": 9.131430857021252e-06, |
|
"loss": 0.1502, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 4.084266464224711, |
|
"grad_norm": 0.0007754967082291842, |
|
"learning_rate": 9.126448022541296e-06, |
|
"loss": 0.1435, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 4.091858037578288, |
|
"grad_norm": 0.059689611196517944, |
|
"learning_rate": 9.121452303745823e-06, |
|
"loss": 0.2681, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 4.099449610931866, |
|
"grad_norm": 23.187213897705078, |
|
"learning_rate": 9.116443716233336e-06, |
|
"loss": 0.0408, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 4.1070411842854435, |
|
"grad_norm": 0.022440658882260323, |
|
"learning_rate": 9.111422275642518e-06, |
|
"loss": 0.0499, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 4.11463275763902, |
|
"grad_norm": 0.04940136522054672, |
|
"learning_rate": 9.10638799765219e-06, |
|
"loss": 0.0007, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 4.122224330992598, |
|
"grad_norm": 0.0109120924025774, |
|
"learning_rate": 9.101340897981247e-06, |
|
"loss": 0.0577, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 4.1298159043461755, |
|
"grad_norm": 15.833015441894531, |
|
"learning_rate": 9.096280992388629e-06, |
|
"loss": 0.0016, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 4.137407477699753, |
|
"grad_norm": 0.002290463075041771, |
|
"learning_rate": 9.091208296673253e-06, |
|
"loss": 0.0022, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 4.144999051053331, |
|
"grad_norm": 0.006408984772861004, |
|
"learning_rate": 9.086122826673976e-06, |
|
"loss": 0.0004, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 4.152590624406908, |
|
"grad_norm": 0.04329880699515343, |
|
"learning_rate": 9.081024598269537e-06, |
|
"loss": 0.0001, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 4.160182197760486, |
|
"grad_norm": 0.0005604320904240012, |
|
"learning_rate": 9.075913627378515e-06, |
|
"loss": 0.1444, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 4.167773771114064, |
|
"grad_norm": 0.0035607500467449427, |
|
"learning_rate": 9.070789929959273e-06, |
|
"loss": 0.0705, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 4.175365344467641, |
|
"grad_norm": 21.509424209594727, |
|
"learning_rate": 9.065653522009914e-06, |
|
"loss": 0.0963, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.182956917821218, |
|
"grad_norm": 0.040827080607414246, |
|
"learning_rate": 9.060504419568226e-06, |
|
"loss": 0.2367, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 4.190548491174796, |
|
"grad_norm": 0.03268290311098099, |
|
"learning_rate": 9.055342638711636e-06, |
|
"loss": 0.1356, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 4.198140064528373, |
|
"grad_norm": 0.02690727449953556, |
|
"learning_rate": 9.050168195557152e-06, |
|
"loss": 0.1927, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 4.205731637881951, |
|
"grad_norm": 0.0010843976633623242, |
|
"learning_rate": 9.044981106261327e-06, |
|
"loss": 0.03, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 4.2133232112355286, |
|
"grad_norm": 0.017938513308763504, |
|
"learning_rate": 9.039781387020195e-06, |
|
"loss": 0.0011, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 4.220914784589106, |
|
"grad_norm": 0.11831680685281754, |
|
"learning_rate": 9.034569054069222e-06, |
|
"loss": 0.0028, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 4.228506357942684, |
|
"grad_norm": 0.0017340222839266062, |
|
"learning_rate": 9.029344123683269e-06, |
|
"loss": 0.0004, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 4.236097931296261, |
|
"grad_norm": 45.62750244140625, |
|
"learning_rate": 9.024106612176519e-06, |
|
"loss": 0.199, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 4.243689504649839, |
|
"grad_norm": 0.00023749677347950637, |
|
"learning_rate": 9.019382108477498e-06, |
|
"loss": 0.0737, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 4.251281078003416, |
|
"grad_norm": 0.0017125029116868973, |
|
"learning_rate": 9.014120737927479e-06, |
|
"loss": 0.0038, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 4.258872651356993, |
|
"grad_norm": 0.005647186189889908, |
|
"learning_rate": 9.008846833789777e-06, |
|
"loss": 0.0524, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 4.266464224710571, |
|
"grad_norm": 0.02812052331864834, |
|
"learning_rate": 9.003560412531492e-06, |
|
"loss": 0.0008, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 4.274055798064149, |
|
"grad_norm": 0.004697522614151239, |
|
"learning_rate": 8.99826149065881e-06, |
|
"loss": 0.022, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 4.281647371417726, |
|
"grad_norm": 0.000999168842099607, |
|
"learning_rate": 8.992950084716952e-06, |
|
"loss": 0.0255, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 4.289238944771304, |
|
"grad_norm": 0.00024819112149998546, |
|
"learning_rate": 8.987626211290112e-06, |
|
"loss": 0.1814, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 4.296830518124882, |
|
"grad_norm": 15.028079986572266, |
|
"learning_rate": 8.982289887001419e-06, |
|
"loss": 0.0483, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 4.304422091478459, |
|
"grad_norm": 0.012629321776330471, |
|
"learning_rate": 8.976941128512873e-06, |
|
"loss": 0.0727, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 4.312013664832037, |
|
"grad_norm": 0.02232271246612072, |
|
"learning_rate": 8.9715799525253e-06, |
|
"loss": 0.1076, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 4.319605238185614, |
|
"grad_norm": 0.013221162371337414, |
|
"learning_rate": 8.966206375778302e-06, |
|
"loss": 0.1304, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 4.327196811539191, |
|
"grad_norm": 20.240745544433594, |
|
"learning_rate": 8.960820415050193e-06, |
|
"loss": 0.0818, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 4.334788384892769, |
|
"grad_norm": 0.9472859501838684, |
|
"learning_rate": 8.955422087157962e-06, |
|
"loss": 0.0875, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 4.3423799582463465, |
|
"grad_norm": 0.24365593492984772, |
|
"learning_rate": 8.950011408957206e-06, |
|
"loss": 0.0052, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 4.349971531599924, |
|
"grad_norm": 0.5765083432197571, |
|
"learning_rate": 8.944588397342093e-06, |
|
"loss": 0.3057, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 4.357563104953502, |
|
"grad_norm": 36.48699951171875, |
|
"learning_rate": 8.939153069245291e-06, |
|
"loss": 0.1687, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 4.365154678307079, |
|
"grad_norm": 0.011977112852036953, |
|
"learning_rate": 8.933705441637931e-06, |
|
"loss": 0.0129, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 4.372746251660657, |
|
"grad_norm": 0.049162607640028, |
|
"learning_rate": 8.928245531529546e-06, |
|
"loss": 0.0747, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 4.380337825014234, |
|
"grad_norm": 0.006424940191209316, |
|
"learning_rate": 8.922773355968018e-06, |
|
"loss": 0.0001, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 4.387929398367811, |
|
"grad_norm": 0.0021049147471785545, |
|
"learning_rate": 8.91728893203953e-06, |
|
"loss": 0.0011, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 4.395520971721389, |
|
"grad_norm": 0.005935146939009428, |
|
"learning_rate": 8.911792276868502e-06, |
|
"loss": 0.0685, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 4.403112545074967, |
|
"grad_norm": 0.16192130744457245, |
|
"learning_rate": 8.906283407617555e-06, |
|
"loss": 0.0789, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 4.410704118428544, |
|
"grad_norm": 0.0363471657037735, |
|
"learning_rate": 8.900762341487439e-06, |
|
"loss": 0.0003, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 4.418295691782122, |
|
"grad_norm": 0.03035406582057476, |
|
"learning_rate": 8.895229095716988e-06, |
|
"loss": 0.0004, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 4.4258872651356995, |
|
"grad_norm": 0.0051777479238808155, |
|
"learning_rate": 8.889683687583067e-06, |
|
"loss": 0.0974, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 4.433478838489277, |
|
"grad_norm": 0.001428132993169129, |
|
"learning_rate": 8.884126134400516e-06, |
|
"loss": 0.0104, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 4.441070411842855, |
|
"grad_norm": 0.029337646439671516, |
|
"learning_rate": 8.8785564535221e-06, |
|
"loss": 0.1961, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 4.448661985196432, |
|
"grad_norm": 103.57210540771484, |
|
"learning_rate": 8.872974662338443e-06, |
|
"loss": 0.0941, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 4.456253558550009, |
|
"grad_norm": 0.006421659607440233, |
|
"learning_rate": 8.86738077827799e-06, |
|
"loss": 0.0586, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 4.463845131903587, |
|
"grad_norm": 0.21757641434669495, |
|
"learning_rate": 8.861774818806939e-06, |
|
"loss": 0.1107, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 4.471436705257164, |
|
"grad_norm": 0.2700095474720001, |
|
"learning_rate": 8.856156801429196e-06, |
|
"loss": 0.1388, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 4.479028278610742, |
|
"grad_norm": 0.0029901862144470215, |
|
"learning_rate": 8.850526743686314e-06, |
|
"loss": 0.1908, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 4.48661985196432, |
|
"grad_norm": 0.008274559862911701, |
|
"learning_rate": 8.844884663157441e-06, |
|
"loss": 0.0842, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 4.494211425317897, |
|
"grad_norm": 0.006725401151925325, |
|
"learning_rate": 8.83923057745926e-06, |
|
"loss": 0.0003, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 4.501802998671475, |
|
"grad_norm": 13.423134803771973, |
|
"learning_rate": 8.833564504245953e-06, |
|
"loss": 0.0658, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 4.509394572025053, |
|
"grad_norm": 0.047781139612197876, |
|
"learning_rate": 8.827886461209114e-06, |
|
"loss": 0.0008, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 4.516986145378629, |
|
"grad_norm": 0.0009586279047653079, |
|
"learning_rate": 8.82219646607772e-06, |
|
"loss": 0.0003, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 4.524577718732207, |
|
"grad_norm": 0.07489871978759766, |
|
"learning_rate": 8.816494536618069e-06, |
|
"loss": 0.0003, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 4.532169292085785, |
|
"grad_norm": 0.015722280368208885, |
|
"learning_rate": 8.810780690633715e-06, |
|
"loss": 0.1269, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 4.539760865439362, |
|
"grad_norm": 0.01760883256793022, |
|
"learning_rate": 8.805054945965429e-06, |
|
"loss": 0.0659, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 4.54735243879294, |
|
"grad_norm": 0.03223474696278572, |
|
"learning_rate": 8.799317320491125e-06, |
|
"loss": 0.0005, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 4.5549440121465175, |
|
"grad_norm": 0.0017072842456400394, |
|
"learning_rate": 8.793567832125823e-06, |
|
"loss": 0.1485, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.562535585500095, |
|
"grad_norm": 0.0031113557051867247, |
|
"learning_rate": 8.787806498821572e-06, |
|
"loss": 0.0058, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 4.570127158853673, |
|
"grad_norm": 0.016612514853477478, |
|
"learning_rate": 8.782033338567414e-06, |
|
"loss": 0.0294, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 4.57771873220725, |
|
"grad_norm": 0.010033627972006798, |
|
"learning_rate": 8.776248369389319e-06, |
|
"loss": 0.064, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 4.585310305560828, |
|
"grad_norm": 0.007523770444095135, |
|
"learning_rate": 8.770451609350123e-06, |
|
"loss": 0.1784, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 4.592901878914405, |
|
"grad_norm": 0.0006488583167083561, |
|
"learning_rate": 8.764643076549481e-06, |
|
"loss": 0.0001, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 4.600493452267982, |
|
"grad_norm": 49.0224494934082, |
|
"learning_rate": 8.75882278912381e-06, |
|
"loss": 0.1479, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 4.60808502562156, |
|
"grad_norm": 0.05112855136394501, |
|
"learning_rate": 8.752990765246222e-06, |
|
"loss": 0.0742, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 4.615676598975138, |
|
"grad_norm": 0.007768516894429922, |
|
"learning_rate": 8.747147023126486e-06, |
|
"loss": 0.0547, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 4.623268172328715, |
|
"grad_norm": 0.03929920494556427, |
|
"learning_rate": 8.741291581010945e-06, |
|
"loss": 0.0005, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 4.630859745682293, |
|
"grad_norm": 0.0333462730050087, |
|
"learning_rate": 8.735424457182483e-06, |
|
"loss": 0.0912, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 4.6384513190358705, |
|
"grad_norm": 0.0021920499857515097, |
|
"learning_rate": 8.729545669960459e-06, |
|
"loss": 0.0025, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 4.646042892389447, |
|
"grad_norm": 0.24167831242084503, |
|
"learning_rate": 8.723655237700646e-06, |
|
"loss": 0.0184, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 4.653634465743025, |
|
"grad_norm": 0.01909787394106388, |
|
"learning_rate": 8.71775317879518e-06, |
|
"loss": 0.0001, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 4.6612260390966025, |
|
"grad_norm": 97.6840591430664, |
|
"learning_rate": 8.711839511672497e-06, |
|
"loss": 0.0578, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 4.66881761245018, |
|
"grad_norm": 0.000244935043156147, |
|
"learning_rate": 8.705914254797283e-06, |
|
"loss": 0.1423, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 4.676409185803758, |
|
"grad_norm": 0.0006741081597283483, |
|
"learning_rate": 8.699977426670403e-06, |
|
"loss": 0.0306, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 4.684000759157335, |
|
"grad_norm": 0.001535810879431665, |
|
"learning_rate": 8.69402904582886e-06, |
|
"loss": 0.0496, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 4.691592332510913, |
|
"grad_norm": 0.4821704030036926, |
|
"learning_rate": 8.688069130845725e-06, |
|
"loss": 0.0443, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 4.699183905864491, |
|
"grad_norm": 0.002279536332935095, |
|
"learning_rate": 8.682097700330086e-06, |
|
"loss": 0.0222, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 4.706775479218068, |
|
"grad_norm": 0.009520245715975761, |
|
"learning_rate": 8.67611477292698e-06, |
|
"loss": 0.1731, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 4.714367052571646, |
|
"grad_norm": 0.1851215660572052, |
|
"learning_rate": 8.67012036731735e-06, |
|
"loss": 0.0629, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 4.721958625925223, |
|
"grad_norm": 0.12576204538345337, |
|
"learning_rate": 8.664114502217975e-06, |
|
"loss": 0.0448, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 4.7295501992788, |
|
"grad_norm": 0.015547769144177437, |
|
"learning_rate": 8.65809719638141e-06, |
|
"loss": 0.0147, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 4.737141772632378, |
|
"grad_norm": 0.2670181095600128, |
|
"learning_rate": 8.65206846859594e-06, |
|
"loss": 0.0005, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 4.7447333459859555, |
|
"grad_norm": 0.028395511209964752, |
|
"learning_rate": 8.646028337685509e-06, |
|
"loss": 0.05, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 4.752324919339533, |
|
"grad_norm": 0.018742332234978676, |
|
"learning_rate": 8.639976822509666e-06, |
|
"loss": 0.2398, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 4.759916492693111, |
|
"grad_norm": 12.270938873291016, |
|
"learning_rate": 8.633913941963507e-06, |
|
"loss": 0.313, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 4.767508066046688, |
|
"grad_norm": 0.07293716818094254, |
|
"learning_rate": 8.627839714977618e-06, |
|
"loss": 0.0008, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 4.775099639400266, |
|
"grad_norm": 0.06347032636404037, |
|
"learning_rate": 8.621754160518005e-06, |
|
"loss": 0.0221, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 4.782691212753843, |
|
"grad_norm": 0.0011452403850853443, |
|
"learning_rate": 8.615657297586051e-06, |
|
"loss": 0.1013, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 4.79028278610742, |
|
"grad_norm": 0.0021203244104981422, |
|
"learning_rate": 8.609549145218442e-06, |
|
"loss": 0.0007, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 4.797874359460998, |
|
"grad_norm": 0.006574318744242191, |
|
"learning_rate": 8.603429722487117e-06, |
|
"loss": 0.0725, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 4.805465932814576, |
|
"grad_norm": 0.00014791313151363283, |
|
"learning_rate": 8.597299048499206e-06, |
|
"loss": 0.0532, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 4.813057506168153, |
|
"grad_norm": 0.12207093834877014, |
|
"learning_rate": 8.591157142396966e-06, |
|
"loss": 0.1137, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 4.820649079521731, |
|
"grad_norm": 0.027442127466201782, |
|
"learning_rate": 8.58500402335773e-06, |
|
"loss": 0.0812, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 4.828240652875309, |
|
"grad_norm": 0.00018395182269159704, |
|
"learning_rate": 8.578839710593836e-06, |
|
"loss": 0.1686, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 4.835832226228886, |
|
"grad_norm": 0.06821048259735107, |
|
"learning_rate": 8.57266422335258e-06, |
|
"loss": 0.0005, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 4.843423799582464, |
|
"grad_norm": 9.863347804639488e-05, |
|
"learning_rate": 8.56647758091614e-06, |
|
"loss": 0.0005, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 4.8510153729360415, |
|
"grad_norm": 0.0016949453856796026, |
|
"learning_rate": 8.560279802601533e-06, |
|
"loss": 0.1504, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 4.858606946289618, |
|
"grad_norm": 0.0009430780191905797, |
|
"learning_rate": 8.554070907760544e-06, |
|
"loss": 0.0, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 4.866198519643196, |
|
"grad_norm": 0.02552955597639084, |
|
"learning_rate": 8.547850915779662e-06, |
|
"loss": 0.0001, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 4.8737900929967735, |
|
"grad_norm": 0.014719455502927303, |
|
"learning_rate": 8.541619846080039e-06, |
|
"loss": 0.15, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 4.881381666350351, |
|
"grad_norm": 0.09882048517465591, |
|
"learning_rate": 8.535377718117399e-06, |
|
"loss": 0.0569, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 4.888973239703929, |
|
"grad_norm": 0.22454605996608734, |
|
"learning_rate": 8.52912455138201e-06, |
|
"loss": 0.1482, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 4.896564813057506, |
|
"grad_norm": 0.08625132590532303, |
|
"learning_rate": 8.52286036539859e-06, |
|
"loss": 0.0011, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 4.904156386411084, |
|
"grad_norm": 0.03739362582564354, |
|
"learning_rate": 8.51658517972628e-06, |
|
"loss": 0.1778, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 4.911747959764662, |
|
"grad_norm": 0.21021807193756104, |
|
"learning_rate": 8.510299013958559e-06, |
|
"loss": 0.0011, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 4.919339533118238, |
|
"grad_norm": 0.04205634444952011, |
|
"learning_rate": 8.504001887723185e-06, |
|
"loss": 0.0787, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 4.926931106471816, |
|
"grad_norm": 0.09222347289323807, |
|
"learning_rate": 8.497693820682146e-06, |
|
"loss": 0.0006, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 4.934522679825394, |
|
"grad_norm": 0.1209307536482811, |
|
"learning_rate": 8.491374832531591e-06, |
|
"loss": 0.053, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.942114253178971, |
|
"grad_norm": 0.009995940141379833, |
|
"learning_rate": 8.485044943001763e-06, |
|
"loss": 0.0096, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 4.949705826532549, |
|
"grad_norm": 0.018289346247911453, |
|
"learning_rate": 8.47870417185695e-06, |
|
"loss": 0.0012, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 4.9572973998861265, |
|
"grad_norm": 65.71520233154297, |
|
"learning_rate": 8.472352538895411e-06, |
|
"loss": 0.1783, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 4.964888973239704, |
|
"grad_norm": 57.22151184082031, |
|
"learning_rate": 8.465990063949323e-06, |
|
"loss": 0.1034, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 4.972480546593282, |
|
"grad_norm": 0.003517146920785308, |
|
"learning_rate": 8.459616766884713e-06, |
|
"loss": 0.0024, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 4.980072119946859, |
|
"grad_norm": 0.0020259765442460775, |
|
"learning_rate": 8.453232667601403e-06, |
|
"loss": 0.0001, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 4.987663693300436, |
|
"grad_norm": 0.007150826510041952, |
|
"learning_rate": 8.44683778603294e-06, |
|
"loss": 0.1704, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 4.995255266654014, |
|
"grad_norm": 0.0018830208573490381, |
|
"learning_rate": 8.440432142146535e-06, |
|
"loss": 0.0349, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 4.99981021066616, |
|
"eval_f1": 0.9552939310725507, |
|
"eval_loss": 0.15884605050086975, |
|
"eval_precision": 0.9567644368540595, |
|
"eval_recall": 0.9552352048558422, |
|
"eval_runtime": 75.7698, |
|
"eval_samples_per_second": 17.395, |
|
"eval_steps_per_second": 17.395, |
|
"step": 6586 |
|
}, |
|
{ |
|
"epoch": 5.002846840007591, |
|
"grad_norm": 0.002661398844793439, |
|
"learning_rate": 8.434015755943013e-06, |
|
"loss": 0.0002, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 5.010438413361169, |
|
"grad_norm": 0.015321805141866207, |
|
"learning_rate": 8.427588647456727e-06, |
|
"loss": 0.0006, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 5.018029986714747, |
|
"grad_norm": 0.015539165586233139, |
|
"learning_rate": 8.42115083675552e-06, |
|
"loss": 0.128, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 5.025621560068324, |
|
"grad_norm": 5.927582263946533, |
|
"learning_rate": 8.414702343940647e-06, |
|
"loss": 0.0743, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 5.033213133421902, |
|
"grad_norm": 0.0004428077954798937, |
|
"learning_rate": 8.408243189146714e-06, |
|
"loss": 0.0764, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 5.0408047067754795, |
|
"grad_norm": 13.519503593444824, |
|
"learning_rate": 8.401773392541621e-06, |
|
"loss": 0.0837, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 5.048396280129057, |
|
"grad_norm": 0.0011204121401533484, |
|
"learning_rate": 8.395292974326497e-06, |
|
"loss": 0.0001, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 5.055987853482634, |
|
"grad_norm": 0.005702109541743994, |
|
"learning_rate": 8.388801954735632e-06, |
|
"loss": 0.0003, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 5.0635794268362115, |
|
"grad_norm": 0.009877257980406284, |
|
"learning_rate": 8.38230035403642e-06, |
|
"loss": 0.0001, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 5.071171000189789, |
|
"grad_norm": 0.0006185189704410732, |
|
"learning_rate": 8.375788192529292e-06, |
|
"loss": 0.0002, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 5.078762573543367, |
|
"grad_norm": 0.0004436051531229168, |
|
"learning_rate": 8.369265490547653e-06, |
|
"loss": 0.0004, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 5.086354146896944, |
|
"grad_norm": 0.016778159886598587, |
|
"learning_rate": 8.362732268457824e-06, |
|
"loss": 0.1505, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 5.093945720250522, |
|
"grad_norm": 0.13505133986473083, |
|
"learning_rate": 8.356188546658966e-06, |
|
"loss": 0.0825, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 5.1015372936041, |
|
"grad_norm": 0.015829697251319885, |
|
"learning_rate": 8.34963434558303e-06, |
|
"loss": 0.106, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 5.109128866957677, |
|
"grad_norm": 0.006577119696885347, |
|
"learning_rate": 8.343069685694687e-06, |
|
"loss": 0.1537, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 5.116720440311255, |
|
"grad_norm": 0.0571792870759964, |
|
"learning_rate": 8.33649458749126e-06, |
|
"loss": 0.023, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 5.124312013664832, |
|
"grad_norm": 0.13444474339485168, |
|
"learning_rate": 8.329909071502668e-06, |
|
"loss": 0.1881, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 5.131903587018409, |
|
"grad_norm": 0.011354477144777775, |
|
"learning_rate": 8.32331315829136e-06, |
|
"loss": 0.2186, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 5.139495160371987, |
|
"grad_norm": 0.11647947877645493, |
|
"learning_rate": 8.31670686845224e-06, |
|
"loss": 0.0005, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 5.147086733725565, |
|
"grad_norm": 0.03318728879094124, |
|
"learning_rate": 8.310090222612623e-06, |
|
"loss": 0.0004, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 5.154678307079142, |
|
"grad_norm": 0.0020830295979976654, |
|
"learning_rate": 8.303463241432156e-06, |
|
"loss": 0.0738, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 5.16226988043272, |
|
"grad_norm": 0.18546123802661896, |
|
"learning_rate": 8.296825945602749e-06, |
|
"loss": 0.225, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 5.1698614537862975, |
|
"grad_norm": 0.013226731680333614, |
|
"learning_rate": 8.290178355848528e-06, |
|
"loss": 0.0024, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 5.177453027139875, |
|
"grad_norm": 0.0015887143090367317, |
|
"learning_rate": 8.283520492925758e-06, |
|
"loss": 0.1161, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 5.185044600493452, |
|
"grad_norm": 12.341133117675781, |
|
"learning_rate": 8.276852377622777e-06, |
|
"loss": 0.0333, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 5.1926361738470295, |
|
"grad_norm": 0.48488712310791016, |
|
"learning_rate": 8.270174030759939e-06, |
|
"loss": 0.0025, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 5.200227747200607, |
|
"grad_norm": 0.09974020719528198, |
|
"learning_rate": 8.263485473189542e-06, |
|
"loss": 0.0003, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 5.207819320554185, |
|
"grad_norm": 0.005017921794205904, |
|
"learning_rate": 8.256786725795767e-06, |
|
"loss": 0.0707, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 5.215410893907762, |
|
"grad_norm": 16.735441207885742, |
|
"learning_rate": 8.250077809494612e-06, |
|
"loss": 0.1761, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 5.22300246726134, |
|
"grad_norm": 0.08619498461484909, |
|
"learning_rate": 8.243358745233822e-06, |
|
"loss": 0.0025, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 5.230594040614918, |
|
"grad_norm": 0.008258694782853127, |
|
"learning_rate": 8.236629553992837e-06, |
|
"loss": 0.1096, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 5.238185613968495, |
|
"grad_norm": 0.032047972083091736, |
|
"learning_rate": 8.229890256782705e-06, |
|
"loss": 0.0774, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 5.245777187322073, |
|
"grad_norm": 0.12164535373449326, |
|
"learning_rate": 8.223140874646039e-06, |
|
"loss": 0.041, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 5.25336876067565, |
|
"grad_norm": 0.30879223346710205, |
|
"learning_rate": 8.216381428656935e-06, |
|
"loss": 0.0008, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 5.260960334029227, |
|
"grad_norm": 0.011329672299325466, |
|
"learning_rate": 8.209611939920912e-06, |
|
"loss": 0.0507, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 5.268551907382805, |
|
"grad_norm": 0.0024318841751664877, |
|
"learning_rate": 8.202832429574851e-06, |
|
"loss": 0.0511, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 5.2761434807363825, |
|
"grad_norm": 0.06363888084888458, |
|
"learning_rate": 8.196042918786923e-06, |
|
"loss": 0.0418, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 5.28373505408996, |
|
"grad_norm": 0.006296386010944843, |
|
"learning_rate": 8.189243428756518e-06, |
|
"loss": 0.0013, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 5.291326627443538, |
|
"grad_norm": 1.5055712461471558, |
|
"learning_rate": 8.182433980714191e-06, |
|
"loss": 0.0003, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 5.298918200797115, |
|
"grad_norm": 0.04809055104851723, |
|
"learning_rate": 8.175614595921589e-06, |
|
"loss": 0.0001, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 5.306509774150693, |
|
"grad_norm": 0.0006017005071043968, |
|
"learning_rate": 8.168785295671385e-06, |
|
"loss": 0.0001, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 5.314101347504271, |
|
"grad_norm": 0.05823567882180214, |
|
"learning_rate": 8.161946101287205e-06, |
|
"loss": 0.1, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.321692920857847, |
|
"grad_norm": 0.21126702427864075, |
|
"learning_rate": 8.155097034123582e-06, |
|
"loss": 0.0012, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 5.329284494211425, |
|
"grad_norm": 0.005064593628048897, |
|
"learning_rate": 8.148238115565865e-06, |
|
"loss": 0.2162, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 5.336876067565003, |
|
"grad_norm": 0.03429802507162094, |
|
"learning_rate": 8.141369367030165e-06, |
|
"loss": 0.0068, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 5.34446764091858, |
|
"grad_norm": 0.019597377628087997, |
|
"learning_rate": 8.134490809963285e-06, |
|
"loss": 0.0447, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 5.352059214272158, |
|
"grad_norm": 3.237245559692383, |
|
"learning_rate": 8.127602465842656e-06, |
|
"loss": 0.0408, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 5.3596507876257355, |
|
"grad_norm": 0.1109641045331955, |
|
"learning_rate": 8.12070435617627e-06, |
|
"loss": 0.0041, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 5.367242360979313, |
|
"grad_norm": 1.6172115802764893, |
|
"learning_rate": 8.113796502502605e-06, |
|
"loss": 0.0008, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 5.374833934332891, |
|
"grad_norm": 0.0019253261853009462, |
|
"learning_rate": 8.106878926390565e-06, |
|
"loss": 0.0106, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 5.382425507686468, |
|
"grad_norm": 0.010185305029153824, |
|
"learning_rate": 8.099951649439415e-06, |
|
"loss": 0.17, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 5.390017081040045, |
|
"grad_norm": 0.00028460906469263136, |
|
"learning_rate": 8.093014693278705e-06, |
|
"loss": 0.0814, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 5.397608654393623, |
|
"grad_norm": 0.09348779916763306, |
|
"learning_rate": 8.08606807956821e-06, |
|
"loss": 0.0562, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 5.4052002277472, |
|
"grad_norm": 0.01985323429107666, |
|
"learning_rate": 8.079111829997861e-06, |
|
"loss": 0.0004, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 5.412791801100778, |
|
"grad_norm": 0.084492027759552, |
|
"learning_rate": 8.072145966287668e-06, |
|
"loss": 0.0393, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 5.420383374454356, |
|
"grad_norm": 0.008949169889092445, |
|
"learning_rate": 8.06517051018767e-06, |
|
"loss": 0.0027, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 5.427974947807933, |
|
"grad_norm": 0.010001681745052338, |
|
"learning_rate": 8.058185483477849e-06, |
|
"loss": 0.0002, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 5.435566521161511, |
|
"grad_norm": 0.00013484137889463454, |
|
"learning_rate": 8.051190907968077e-06, |
|
"loss": 0.0617, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 5.443158094515089, |
|
"grad_norm": 0.028125835582613945, |
|
"learning_rate": 8.044186805498033e-06, |
|
"loss": 0.0003, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 5.450749667868665, |
|
"grad_norm": 0.011845303699374199, |
|
"learning_rate": 8.037173197937149e-06, |
|
"loss": 0.0002, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 5.458341241222243, |
|
"grad_norm": 0.021918371319770813, |
|
"learning_rate": 8.030150107184535e-06, |
|
"loss": 0.0003, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 5.465932814575821, |
|
"grad_norm": 0.002744874684140086, |
|
"learning_rate": 8.023117555168907e-06, |
|
"loss": 0.0174, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 5.473524387929398, |
|
"grad_norm": 0.0008592222584411502, |
|
"learning_rate": 8.016075563848524e-06, |
|
"loss": 0.0001, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 5.481115961282976, |
|
"grad_norm": 0.0009818489197641611, |
|
"learning_rate": 8.009024155211125e-06, |
|
"loss": 0.0001, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 5.4887075346365535, |
|
"grad_norm": 0.0036790217272937298, |
|
"learning_rate": 8.001963351273843e-06, |
|
"loss": 0.0001, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 5.496299107990131, |
|
"grad_norm": 0.009668831713497639, |
|
"learning_rate": 7.994893174083151e-06, |
|
"loss": 0.0663, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 5.503890681343709, |
|
"grad_norm": 0.008087705820798874, |
|
"learning_rate": 7.98781364571479e-06, |
|
"loss": 0.0, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 5.511482254697286, |
|
"grad_norm": 0.001750052673742175, |
|
"learning_rate": 7.980724788273698e-06, |
|
"loss": 0.0001, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 5.519073828050864, |
|
"grad_norm": 0.0040147858671844006, |
|
"learning_rate": 7.973626623893942e-06, |
|
"loss": 0.1629, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 5.526665401404441, |
|
"grad_norm": 0.005076427478343248, |
|
"learning_rate": 7.96651917473865e-06, |
|
"loss": 0.0001, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 5.534256974758018, |
|
"grad_norm": 0.022049933671951294, |
|
"learning_rate": 7.959402462999934e-06, |
|
"loss": 0.0001, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 5.541848548111596, |
|
"grad_norm": 7.288018226623535, |
|
"learning_rate": 7.952276510898838e-06, |
|
"loss": 0.0612, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 5.549440121465174, |
|
"grad_norm": 0.48564571142196655, |
|
"learning_rate": 7.945141340685249e-06, |
|
"loss": 0.0001, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 5.557031694818751, |
|
"grad_norm": 0.0020839564967900515, |
|
"learning_rate": 7.937996974637839e-06, |
|
"loss": 0.0002, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 5.564623268172329, |
|
"grad_norm": 0.0012567265657708049, |
|
"learning_rate": 7.930843435063996e-06, |
|
"loss": 0.0003, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 5.5722148415259065, |
|
"grad_norm": 0.0036961582954972982, |
|
"learning_rate": 7.923680744299747e-06, |
|
"loss": 0.0876, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 5.579806414879484, |
|
"grad_norm": 31.300655364990234, |
|
"learning_rate": 7.916508924709693e-06, |
|
"loss": 0.151, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 5.587397988233061, |
|
"grad_norm": 0.008196866139769554, |
|
"learning_rate": 7.909327998686942e-06, |
|
"loss": 0.0001, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 5.5949895615866385, |
|
"grad_norm": 10.782143592834473, |
|
"learning_rate": 7.902137988653032e-06, |
|
"loss": 0.0539, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 5.602581134940216, |
|
"grad_norm": 0.004750726278871298, |
|
"learning_rate": 7.894938917057866e-06, |
|
"loss": 0.0385, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 5.610172708293794, |
|
"grad_norm": 0.04581161588430405, |
|
"learning_rate": 7.887730806379641e-06, |
|
"loss": 0.2684, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 5.617764281647371, |
|
"grad_norm": 0.026009181514382362, |
|
"learning_rate": 7.880513679124777e-06, |
|
"loss": 0.1283, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 5.625355855000949, |
|
"grad_norm": 2.0138673782348633, |
|
"learning_rate": 7.873287557827846e-06, |
|
"loss": 0.0004, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 5.632947428354527, |
|
"grad_norm": 0.14630401134490967, |
|
"learning_rate": 7.866052465051506e-06, |
|
"loss": 0.0503, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 5.640539001708104, |
|
"grad_norm": 0.0008778591873124242, |
|
"learning_rate": 7.858808423386422e-06, |
|
"loss": 0.0032, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 5.648130575061682, |
|
"grad_norm": 0.004400940611958504, |
|
"learning_rate": 7.851555455451208e-06, |
|
"loss": 0.0002, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 5.6557221484152596, |
|
"grad_norm": 0.002588229486718774, |
|
"learning_rate": 7.844293583892341e-06, |
|
"loss": 0.0055, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 5.663313721768836, |
|
"grad_norm": 0.0016362261958420277, |
|
"learning_rate": 7.837022831384107e-06, |
|
"loss": 0.0001, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 5.670905295122414, |
|
"grad_norm": 0.006628331728279591, |
|
"learning_rate": 7.829743220628515e-06, |
|
"loss": 0.0001, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 5.6784968684759916, |
|
"grad_norm": 0.0015720854280516505, |
|
"learning_rate": 7.822454774355233e-06, |
|
"loss": 0.1205, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 5.686088441829569, |
|
"grad_norm": 0.005687546916306019, |
|
"learning_rate": 7.815157515321521e-06, |
|
"loss": 0.1584, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 5.693680015183147, |
|
"grad_norm": 0.0018359271343797445, |
|
"learning_rate": 7.807851466312152e-06, |
|
"loss": 0.0833, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.701271588536724, |
|
"grad_norm": 0.004786277189850807, |
|
"learning_rate": 7.80053665013935e-06, |
|
"loss": 0.0004, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 5.708863161890302, |
|
"grad_norm": 0.14934459328651428, |
|
"learning_rate": 7.793213089642705e-06, |
|
"loss": 0.0678, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 5.716454735243879, |
|
"grad_norm": 0.002186194993555546, |
|
"learning_rate": 7.785880807689119e-06, |
|
"loss": 0.014, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 5.7240463085974564, |
|
"grad_norm": 0.007107855286449194, |
|
"learning_rate": 7.778539827172717e-06, |
|
"loss": 0.0021, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 5.731637881951034, |
|
"grad_norm": 0.00156366394367069, |
|
"learning_rate": 7.771190171014789e-06, |
|
"loss": 0.0299, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 5.739229455304612, |
|
"grad_norm": 0.006057819351553917, |
|
"learning_rate": 7.763831862163715e-06, |
|
"loss": 0.3021, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 5.746821028658189, |
|
"grad_norm": 0.1267128884792328, |
|
"learning_rate": 7.756464923594889e-06, |
|
"loss": 0.1477, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 5.754412602011767, |
|
"grad_norm": 0.003787196008488536, |
|
"learning_rate": 7.74908937831065e-06, |
|
"loss": 0.0012, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 5.762004175365345, |
|
"grad_norm": 0.004670240916311741, |
|
"learning_rate": 7.741705249340212e-06, |
|
"loss": 0.0001, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 5.769595748718922, |
|
"grad_norm": 0.0031925721559673548, |
|
"learning_rate": 7.734312559739591e-06, |
|
"loss": 0.1256, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 5.7771873220725, |
|
"grad_norm": 0.05346198379993439, |
|
"learning_rate": 7.726911332591533e-06, |
|
"loss": 0.0297, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 5.7847788954260775, |
|
"grad_norm": 9.102517127990723, |
|
"learning_rate": 7.719501591005435e-06, |
|
"loss": 0.0291, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 5.792370468779654, |
|
"grad_norm": 0.012199531309306622, |
|
"learning_rate": 7.71208335811729e-06, |
|
"loss": 0.0015, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 5.799962042133232, |
|
"grad_norm": 0.0010750379879027605, |
|
"learning_rate": 7.704656657089594e-06, |
|
"loss": 0.0002, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 5.8075536154868095, |
|
"grad_norm": 0.0029223288875073195, |
|
"learning_rate": 7.697221511111289e-06, |
|
"loss": 0.0404, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 5.815145188840387, |
|
"grad_norm": 0.030176958069205284, |
|
"learning_rate": 7.689777943397684e-06, |
|
"loss": 0.0002, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 5.822736762193965, |
|
"grad_norm": 0.01166499499231577, |
|
"learning_rate": 7.682325977190386e-06, |
|
"loss": 0.0381, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 5.830328335547542, |
|
"grad_norm": 32.26509475708008, |
|
"learning_rate": 7.674865635757219e-06, |
|
"loss": 0.0993, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 5.83791990890112, |
|
"grad_norm": 19.091943740844727, |
|
"learning_rate": 7.667396942392165e-06, |
|
"loss": 0.0492, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 5.845511482254698, |
|
"grad_norm": 0.01752518303692341, |
|
"learning_rate": 7.659919920415282e-06, |
|
"loss": 0.0053, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 5.853103055608274, |
|
"grad_norm": 0.0013000709004700184, |
|
"learning_rate": 7.652434593172629e-06, |
|
"loss": 0.2842, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 5.860694628961852, |
|
"grad_norm": 76.4178695678711, |
|
"learning_rate": 7.6449409840362e-06, |
|
"loss": 0.019, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 5.86828620231543, |
|
"grad_norm": 49.07400894165039, |
|
"learning_rate": 7.63743911640385e-06, |
|
"loss": 0.0412, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 5.875877775669007, |
|
"grad_norm": 0.018517136573791504, |
|
"learning_rate": 7.629929013699215e-06, |
|
"loss": 0.0113, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 5.883469349022585, |
|
"grad_norm": 0.0009308361331932247, |
|
"learning_rate": 7.622410699371651e-06, |
|
"loss": 0.0975, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 5.8910609223761625, |
|
"grad_norm": 0.002873294521123171, |
|
"learning_rate": 7.614884196896146e-06, |
|
"loss": 0.0001, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 5.89865249572974, |
|
"grad_norm": 0.5766377449035645, |
|
"learning_rate": 7.607349529773263e-06, |
|
"loss": 0.0894, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 5.906244069083318, |
|
"grad_norm": 0.33659154176712036, |
|
"learning_rate": 7.599806721529048e-06, |
|
"loss": 0.026, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 5.913835642436895, |
|
"grad_norm": 0.06800296902656555, |
|
"learning_rate": 7.592255795714978e-06, |
|
"loss": 0.001, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 5.921427215790473, |
|
"grad_norm": 0.010890863835811615, |
|
"learning_rate": 7.5846967759078646e-06, |
|
"loss": 0.0515, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 5.92901878914405, |
|
"grad_norm": 0.0007496042526327074, |
|
"learning_rate": 7.577129685709802e-06, |
|
"loss": 0.0196, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 5.936610362497627, |
|
"grad_norm": 0.12547799944877625, |
|
"learning_rate": 7.569554548748076e-06, |
|
"loss": 0.0212, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 5.944201935851205, |
|
"grad_norm": 0.000410243752412498, |
|
"learning_rate": 7.561971388675101e-06, |
|
"loss": 0.0001, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 5.951793509204783, |
|
"grad_norm": 0.0626864954829216, |
|
"learning_rate": 7.554380229168341e-06, |
|
"loss": 0.1047, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 5.95938508255836, |
|
"grad_norm": 0.0048113660886883736, |
|
"learning_rate": 7.546781093930238e-06, |
|
"loss": 0.0166, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 5.966976655911938, |
|
"grad_norm": 0.04934828728437424, |
|
"learning_rate": 7.539174006688137e-06, |
|
"loss": 0.1765, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 5.974568229265516, |
|
"grad_norm": 3.118401527404785, |
|
"learning_rate": 7.531558991194214e-06, |
|
"loss": 0.0369, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 5.982159802619093, |
|
"grad_norm": 33.45072937011719, |
|
"learning_rate": 7.523936071225395e-06, |
|
"loss": 0.1186, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 5.98975137597267, |
|
"grad_norm": 0.09529292583465576, |
|
"learning_rate": 7.516305270583291e-06, |
|
"loss": 0.0382, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 5.997342949326248, |
|
"grad_norm": 0.09993643313646317, |
|
"learning_rate": 7.50866661309412e-06, |
|
"loss": 0.1966, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 5.999620421332321, |
|
"eval_f1": 0.9453778934602862, |
|
"eval_loss": 0.17724575102329254, |
|
"eval_precision": 0.9455308702748206, |
|
"eval_recall": 0.9453717754172989, |
|
"eval_runtime": 75.7194, |
|
"eval_samples_per_second": 17.406, |
|
"eval_steps_per_second": 17.406, |
|
"step": 7903 |
|
}, |
|
{ |
|
"epoch": 6.004934522679825, |
|
"grad_norm": 0.5747145414352417, |
|
"learning_rate": 7.5010201226086285e-06, |
|
"loss": 0.0792, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 6.012526096033403, |
|
"grad_norm": 15.592010498046875, |
|
"learning_rate": 7.493365823002023e-06, |
|
"loss": 0.066, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 6.0201176693869805, |
|
"grad_norm": 0.002133031841367483, |
|
"learning_rate": 7.4857037381738924e-06, |
|
"loss": 0.0001, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 6.027709242740558, |
|
"grad_norm": 0.006577716208994389, |
|
"learning_rate": 7.478033892048134e-06, |
|
"loss": 0.0005, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 6.035300816094136, |
|
"grad_norm": 0.0061035482212901115, |
|
"learning_rate": 7.470356308572879e-06, |
|
"loss": 0.0, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 6.042892389447713, |
|
"grad_norm": 0.0037885792553424835, |
|
"learning_rate": 7.462671011720417e-06, |
|
"loss": 0.0001, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 6.050483962801291, |
|
"grad_norm": 0.010262789204716682, |
|
"learning_rate": 7.454978025487121e-06, |
|
"loss": 0.0007, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 6.058075536154868, |
|
"grad_norm": 0.0021226617973297834, |
|
"learning_rate": 7.447277373893373e-06, |
|
"loss": 0.0386, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 6.065667109508445, |
|
"grad_norm": 0.00850209966301918, |
|
"learning_rate": 7.439569080983493e-06, |
|
"loss": 0.0008, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 6.073258682862023, |
|
"grad_norm": 0.004618831444531679, |
|
"learning_rate": 7.431853170825658e-06, |
|
"loss": 0.0, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.080850256215601, |
|
"grad_norm": 0.0010309051722288132, |
|
"learning_rate": 7.424129667511824e-06, |
|
"loss": 0.0174, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 6.088441829569178, |
|
"grad_norm": 0.005731165409088135, |
|
"learning_rate": 7.4163985951576616e-06, |
|
"loss": 0.0099, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 6.096033402922756, |
|
"grad_norm": 2.437437057495117, |
|
"learning_rate": 7.408659977902474e-06, |
|
"loss": 0.159, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 6.1036249762763335, |
|
"grad_norm": 0.008021681569516659, |
|
"learning_rate": 7.400913839909119e-06, |
|
"loss": 0.0002, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 6.111216549629911, |
|
"grad_norm": 0.0012970505049452186, |
|
"learning_rate": 7.3931602053639414e-06, |
|
"loss": 0.0527, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 6.118808122983489, |
|
"grad_norm": 0.031485993415117264, |
|
"learning_rate": 7.385399098476691e-06, |
|
"loss": 0.0416, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 6.1263996963370655, |
|
"grad_norm": 0.037826113402843475, |
|
"learning_rate": 7.377630543480447e-06, |
|
"loss": 0.0064, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 6.133991269690643, |
|
"grad_norm": 0.007939423434436321, |
|
"learning_rate": 7.369854564631549e-06, |
|
"loss": 0.0004, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 6.141582843044221, |
|
"grad_norm": 0.011576803401112556, |
|
"learning_rate": 7.3620711862095116e-06, |
|
"loss": 0.0003, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 6.149174416397798, |
|
"grad_norm": 0.01118936575949192, |
|
"learning_rate": 7.354280432516957e-06, |
|
"loss": 0.0002, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 6.156765989751376, |
|
"grad_norm": 0.001931383740156889, |
|
"learning_rate": 7.346482327879535e-06, |
|
"loss": 0.0009, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 6.164357563104954, |
|
"grad_norm": 0.005506934132426977, |
|
"learning_rate": 7.338676896645848e-06, |
|
"loss": 0.0567, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 6.171949136458531, |
|
"grad_norm": 0.07792196422815323, |
|
"learning_rate": 7.330864163187372e-06, |
|
"loss": 0.0003, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 6.179540709812109, |
|
"grad_norm": 0.06636549532413483, |
|
"learning_rate": 7.323044151898388e-06, |
|
"loss": 0.0658, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 6.1871322831656865, |
|
"grad_norm": 0.0012724515981972218, |
|
"learning_rate": 7.3152168871959e-06, |
|
"loss": 0.0605, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 6.194723856519263, |
|
"grad_norm": 0.0033073413651436567, |
|
"learning_rate": 7.307382393519556e-06, |
|
"loss": 0.0732, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 6.202315429872841, |
|
"grad_norm": 0.00361923361197114, |
|
"learning_rate": 7.299540695331579e-06, |
|
"loss": 0.0054, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 6.2099070032264185, |
|
"grad_norm": 0.0007601641118526459, |
|
"learning_rate": 7.291691817116686e-06, |
|
"loss": 0.0001, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 6.217498576579996, |
|
"grad_norm": 0.0025373934768140316, |
|
"learning_rate": 7.283835783382015e-06, |
|
"loss": 0.0567, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 6.225090149933574, |
|
"grad_norm": 0.0037624204996973276, |
|
"learning_rate": 7.275972618657041e-06, |
|
"loss": 0.0001, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 6.232681723287151, |
|
"grad_norm": 0.002659817226231098, |
|
"learning_rate": 7.268102347493511e-06, |
|
"loss": 0.0727, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 6.240273296640729, |
|
"grad_norm": 0.08516960591077805, |
|
"learning_rate": 7.260224994465357e-06, |
|
"loss": 0.001, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 6.247864869994307, |
|
"grad_norm": 0.03827419877052307, |
|
"learning_rate": 7.252340584168624e-06, |
|
"loss": 0.0023, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 6.255456443347883, |
|
"grad_norm": 0.0027726832777261734, |
|
"learning_rate": 7.2444491412213914e-06, |
|
"loss": 0.0536, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 6.263048016701461, |
|
"grad_norm": 0.0064014289528131485, |
|
"learning_rate": 7.236550690263702e-06, |
|
"loss": 0.001, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 6.270639590055039, |
|
"grad_norm": 0.005650675855576992, |
|
"learning_rate": 7.228645255957472e-06, |
|
"loss": 0.2206, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 6.278231163408616, |
|
"grad_norm": 21.262990951538086, |
|
"learning_rate": 7.2207328629864285e-06, |
|
"loss": 0.0884, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 6.285822736762194, |
|
"grad_norm": 0.03092315047979355, |
|
"learning_rate": 7.212813536056025e-06, |
|
"loss": 0.0684, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 6.293414310115772, |
|
"grad_norm": 0.00995034258812666, |
|
"learning_rate": 7.2048872998933665e-06, |
|
"loss": 0.0003, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 6.301005883469349, |
|
"grad_norm": 0.08173485100269318, |
|
"learning_rate": 7.196954179247127e-06, |
|
"loss": 0.0699, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 6.308597456822927, |
|
"grad_norm": 0.15706369280815125, |
|
"learning_rate": 7.189014198887478e-06, |
|
"loss": 0.0419, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 6.3161890301765045, |
|
"grad_norm": 0.44603389501571655, |
|
"learning_rate": 7.181067383606015e-06, |
|
"loss": 0.0374, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 6.323780603530081, |
|
"grad_norm": 89.45038604736328, |
|
"learning_rate": 7.173113758215667e-06, |
|
"loss": 0.0231, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 6.331372176883659, |
|
"grad_norm": 0.07431600242853165, |
|
"learning_rate": 7.165153347550631e-06, |
|
"loss": 0.007, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 6.3389637502372365, |
|
"grad_norm": 0.00812879391014576, |
|
"learning_rate": 7.15718617646629e-06, |
|
"loss": 0.1122, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 6.346555323590814, |
|
"grad_norm": 0.4049533009529114, |
|
"learning_rate": 7.149212269839132e-06, |
|
"loss": 0.0532, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 6.354146896944392, |
|
"grad_norm": 0.403401255607605, |
|
"learning_rate": 7.141231652566681e-06, |
|
"loss": 0.0008, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 6.361738470297969, |
|
"grad_norm": 0.8025851249694824, |
|
"learning_rate": 7.133244349567411e-06, |
|
"loss": 0.0221, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 6.369330043651547, |
|
"grad_norm": 0.06498798727989197, |
|
"learning_rate": 7.125250385780673e-06, |
|
"loss": 0.0621, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 6.376921617005125, |
|
"grad_norm": 0.0010519091738387942, |
|
"learning_rate": 7.1172497861666124e-06, |
|
"loss": 0.0404, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 6.384513190358702, |
|
"grad_norm": 0.01423695683479309, |
|
"learning_rate": 7.109242575706099e-06, |
|
"loss": 0.0314, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 6.392104763712279, |
|
"grad_norm": 0.8802148103713989, |
|
"learning_rate": 7.10122877940064e-06, |
|
"loss": 0.013, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 6.399696337065857, |
|
"grad_norm": 0.037081677466630936, |
|
"learning_rate": 7.093208422272309e-06, |
|
"loss": 0.0005, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 6.407287910419434, |
|
"grad_norm": 0.0005525704473257065, |
|
"learning_rate": 7.085181529363661e-06, |
|
"loss": 0.0972, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 6.414879483773012, |
|
"grad_norm": 0.018398938700556755, |
|
"learning_rate": 7.077148125737661e-06, |
|
"loss": 0.1108, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 6.4224710571265895, |
|
"grad_norm": 0.040173228830099106, |
|
"learning_rate": 7.069108236477604e-06, |
|
"loss": 0.0002, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 6.430062630480167, |
|
"grad_norm": 0.009616430848836899, |
|
"learning_rate": 7.061061886687035e-06, |
|
"loss": 0.0013, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 6.437654203833745, |
|
"grad_norm": 78.41429901123047, |
|
"learning_rate": 7.053009101489667e-06, |
|
"loss": 0.1232, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 6.445245777187322, |
|
"grad_norm": 0.000696105882525444, |
|
"learning_rate": 7.044949906029314e-06, |
|
"loss": 0.0066, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 6.4528373505409, |
|
"grad_norm": 0.012759624980390072, |
|
"learning_rate": 7.036884325469797e-06, |
|
"loss": 0.205, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 6.460428923894477, |
|
"grad_norm": 51.840309143066406, |
|
"learning_rate": 7.028812384994883e-06, |
|
"loss": 0.1227, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 6.468020497248054, |
|
"grad_norm": 0.0019890512339770794, |
|
"learning_rate": 7.0207341098081875e-06, |
|
"loss": 0.1419, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 6.475612070601632, |
|
"grad_norm": 0.003854219801723957, |
|
"learning_rate": 7.012649525133112e-06, |
|
"loss": 0.0714, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 6.48320364395521, |
|
"grad_norm": 0.06946977972984314, |
|
"learning_rate": 7.004558656212754e-06, |
|
"loss": 0.0004, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 6.490795217308787, |
|
"grad_norm": 0.003731220494955778, |
|
"learning_rate": 6.9964615283098405e-06, |
|
"loss": 0.0017, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 6.498386790662365, |
|
"grad_norm": 0.002791723469272256, |
|
"learning_rate": 6.988358166706631e-06, |
|
"loss": 0.0403, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 6.5059783640159425, |
|
"grad_norm": 4.053121089935303, |
|
"learning_rate": 6.980248596704856e-06, |
|
"loss": 0.0008, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 6.51356993736952, |
|
"grad_norm": 0.0038540286477655172, |
|
"learning_rate": 6.97213284362563e-06, |
|
"loss": 0.0003, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 6.521161510723097, |
|
"grad_norm": 0.0033889245241880417, |
|
"learning_rate": 6.96401093280937e-06, |
|
"loss": 0.0505, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 6.5287530840766745, |
|
"grad_norm": 0.0008385963155888021, |
|
"learning_rate": 6.9558828896157225e-06, |
|
"loss": 0.0001, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 6.536344657430252, |
|
"grad_norm": 0.05049284175038338, |
|
"learning_rate": 6.947748739423483e-06, |
|
"loss": 0.0776, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 6.54393623078383, |
|
"grad_norm": 0.014165320433676243, |
|
"learning_rate": 6.939608507630513e-06, |
|
"loss": 0.0339, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 6.551527804137407, |
|
"grad_norm": 24.47572898864746, |
|
"learning_rate": 6.931462219653662e-06, |
|
"loss": 0.1604, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 6.559119377490985, |
|
"grad_norm": 0.07809809595346451, |
|
"learning_rate": 6.923309900928693e-06, |
|
"loss": 0.0003, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 6.566710950844563, |
|
"grad_norm": 0.08131968230009079, |
|
"learning_rate": 6.915151576910194e-06, |
|
"loss": 0.0097, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 6.57430252419814, |
|
"grad_norm": 106.42731475830078, |
|
"learning_rate": 6.906987273071509e-06, |
|
"loss": 0.0111, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 6.581894097551718, |
|
"grad_norm": 0.0046349032782018185, |
|
"learning_rate": 6.898817014904653e-06, |
|
"loss": 0.0601, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 6.589485670905296, |
|
"grad_norm": 0.00192779372446239, |
|
"learning_rate": 6.890640827920226e-06, |
|
"loss": 0.0349, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 6.597077244258872, |
|
"grad_norm": 0.0012624857481569052, |
|
"learning_rate": 6.882458737647346e-06, |
|
"loss": 0.0009, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 6.60466881761245, |
|
"grad_norm": 0.00019073448493145406, |
|
"learning_rate": 6.874270769633564e-06, |
|
"loss": 0.0001, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 6.612260390966028, |
|
"grad_norm": 0.03901955857872963, |
|
"learning_rate": 6.866076949444781e-06, |
|
"loss": 0.238, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 6.619851964319605, |
|
"grad_norm": 0.05632855370640755, |
|
"learning_rate": 6.857877302665169e-06, |
|
"loss": 0.0435, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 6.627443537673183, |
|
"grad_norm": 0.01720161736011505, |
|
"learning_rate": 6.8496718548970956e-06, |
|
"loss": 0.0208, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 6.6350351110267605, |
|
"grad_norm": 0.00398442754521966, |
|
"learning_rate": 6.8414606317610435e-06, |
|
"loss": 0.0012, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 6.642626684380338, |
|
"grad_norm": 0.02426181733608246, |
|
"learning_rate": 6.833243658895521e-06, |
|
"loss": 0.0004, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 6.650218257733916, |
|
"grad_norm": 14.350150108337402, |
|
"learning_rate": 6.825020961956995e-06, |
|
"loss": 0.0823, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 6.6578098310874925, |
|
"grad_norm": 0.0016744782915338874, |
|
"learning_rate": 6.816792566619805e-06, |
|
"loss": 0.1436, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 6.66540140444107, |
|
"grad_norm": 0.020618196576833725, |
|
"learning_rate": 6.808558498576081e-06, |
|
"loss": 0.0006, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 6.672992977794648, |
|
"grad_norm": 0.13271041214466095, |
|
"learning_rate": 6.800318783535665e-06, |
|
"loss": 0.0074, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 6.680584551148225, |
|
"grad_norm": 0.020608441904187202, |
|
"learning_rate": 6.792073447226034e-06, |
|
"loss": 0.0002, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 6.688176124501803, |
|
"grad_norm": 0.0014845712576061487, |
|
"learning_rate": 6.7838225153922125e-06, |
|
"loss": 0.0004, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 6.695767697855381, |
|
"grad_norm": 0.06566622108221054, |
|
"learning_rate": 6.775566013796699e-06, |
|
"loss": 0.055, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 6.703359271208958, |
|
"grad_norm": 0.13233526051044464, |
|
"learning_rate": 6.767303968219383e-06, |
|
"loss": 0.051, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 6.710950844562536, |
|
"grad_norm": 12.247241020202637, |
|
"learning_rate": 6.759036404457465e-06, |
|
"loss": 0.171, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 6.7185424179161135, |
|
"grad_norm": 0.06808517873287201, |
|
"learning_rate": 6.750763348325371e-06, |
|
"loss": 0.1818, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 6.726133991269691, |
|
"grad_norm": 0.011621583253145218, |
|
"learning_rate": 6.7424848256546825e-06, |
|
"loss": 0.0119, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 6.733725564623268, |
|
"grad_norm": 22.450834274291992, |
|
"learning_rate": 6.734200862294045e-06, |
|
"loss": 0.176, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 6.7413171379768455, |
|
"grad_norm": 11.976455688476562, |
|
"learning_rate": 6.725911484109094e-06, |
|
"loss": 0.0507, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 6.748908711330423, |
|
"grad_norm": 0.042554713785648346, |
|
"learning_rate": 6.717616716982369e-06, |
|
"loss": 0.0004, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 6.756500284684001, |
|
"grad_norm": 0.0029066246934235096, |
|
"learning_rate": 6.7093165868132415e-06, |
|
"loss": 0.0066, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 6.764091858037578, |
|
"grad_norm": 0.31371551752090454, |
|
"learning_rate": 6.701011119517824e-06, |
|
"loss": 0.0311, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 6.771683431391156, |
|
"grad_norm": 0.025408325716853142, |
|
"learning_rate": 6.692700341028893e-06, |
|
"loss": 0.0002, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 6.779275004744734, |
|
"grad_norm": 0.6896237730979919, |
|
"learning_rate": 6.684384277295813e-06, |
|
"loss": 0.003, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 6.78686657809831, |
|
"grad_norm": 0.0014387418050318956, |
|
"learning_rate": 6.676062954284447e-06, |
|
"loss": 0.1432, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 6.794458151451888, |
|
"grad_norm": 0.012326021678745747, |
|
"learning_rate": 6.667736397977079e-06, |
|
"loss": 0.0131, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 6.802049724805466, |
|
"grad_norm": 0.010481426492333412, |
|
"learning_rate": 6.659404634372338e-06, |
|
"loss": 0.0027, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 6.809641298159043, |
|
"grad_norm": 0.11520393937826157, |
|
"learning_rate": 6.6510676894851065e-06, |
|
"loss": 0.0008, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 6.817232871512621, |
|
"grad_norm": 14.105742454528809, |
|
"learning_rate": 6.6427255893464495e-06, |
|
"loss": 0.1792, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 6.8248244448661985, |
|
"grad_norm": 0.012812143191695213, |
|
"learning_rate": 6.634378360003525e-06, |
|
"loss": 0.0001, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 6.832416018219776, |
|
"grad_norm": 0.0041709113866090775, |
|
"learning_rate": 6.62602602751951e-06, |
|
"loss": 0.0001, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.840007591573354, |
|
"grad_norm": 0.0038161997217684984, |
|
"learning_rate": 6.6176686179735095e-06, |
|
"loss": 0.0665, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 6.847599164926931, |
|
"grad_norm": 0.30405986309051514, |
|
"learning_rate": 6.6093061574604875e-06, |
|
"loss": 0.0624, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 6.855190738280509, |
|
"grad_norm": 0.001419481704942882, |
|
"learning_rate": 6.600938672091178e-06, |
|
"loss": 0.0001, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 6.862782311634086, |
|
"grad_norm": 0.005425265524536371, |
|
"learning_rate": 6.592566187992e-06, |
|
"loss": 0.0115, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 6.870373884987663, |
|
"grad_norm": 0.009964833967387676, |
|
"learning_rate": 6.584188731304984e-06, |
|
"loss": 0.0001, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 6.877965458341241, |
|
"grad_norm": 17.450939178466797, |
|
"learning_rate": 6.575806328187684e-06, |
|
"loss": 0.0065, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 6.885557031694819, |
|
"grad_norm": 0.5963069796562195, |
|
"learning_rate": 6.567419004813105e-06, |
|
"loss": 0.162, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 6.893148605048396, |
|
"grad_norm": 0.002563629997894168, |
|
"learning_rate": 6.559026787369608e-06, |
|
"loss": 0.0006, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 6.900740178401974, |
|
"grad_norm": 0.0032906217966228724, |
|
"learning_rate": 6.550629702060836e-06, |
|
"loss": 0.0576, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 6.908331751755552, |
|
"grad_norm": 0.00252812379039824, |
|
"learning_rate": 6.542227775105636e-06, |
|
"loss": 0.0003, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 6.915923325109129, |
|
"grad_norm": 0.13027949631214142, |
|
"learning_rate": 6.533821032737968e-06, |
|
"loss": 0.1393, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 6.923514898462706, |
|
"grad_norm": 0.0013868529349565506, |
|
"learning_rate": 6.525409501206828e-06, |
|
"loss": 0.0003, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 6.931106471816284, |
|
"grad_norm": 0.0035531616304069757, |
|
"learning_rate": 6.516993206776167e-06, |
|
"loss": 0.0516, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 6.938698045169861, |
|
"grad_norm": 0.02282761037349701, |
|
"learning_rate": 6.508572175724809e-06, |
|
"loss": 0.0811, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 6.946289618523439, |
|
"grad_norm": 29.90252685546875, |
|
"learning_rate": 6.500146434346363e-06, |
|
"loss": 0.065, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 6.9538811918770165, |
|
"grad_norm": 0.14673539996147156, |
|
"learning_rate": 6.4917160089491475e-06, |
|
"loss": 0.0004, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 6.961472765230594, |
|
"grad_norm": 2.630889892578125, |
|
"learning_rate": 6.483280925856108e-06, |
|
"loss": 0.039, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 6.969064338584172, |
|
"grad_norm": 0.005536849144846201, |
|
"learning_rate": 6.474841211404732e-06, |
|
"loss": 0.0212, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 6.976655911937749, |
|
"grad_norm": 13.343396186828613, |
|
"learning_rate": 6.466396891946967e-06, |
|
"loss": 0.1344, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 6.984247485291327, |
|
"grad_norm": 13.473750114440918, |
|
"learning_rate": 6.457947993849138e-06, |
|
"loss": 0.0461, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 6.991839058644905, |
|
"grad_norm": 0.10873476415872574, |
|
"learning_rate": 6.4494945434918695e-06, |
|
"loss": 0.0008, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 6.999430631998481, |
|
"grad_norm": 0.08516258746385574, |
|
"learning_rate": 6.441036567269999e-06, |
|
"loss": 0.0006, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 6.999430631998481, |
|
"eval_f1": 0.9574758853469025, |
|
"eval_loss": 0.15473049879074097, |
|
"eval_precision": 0.9565893515212521, |
|
"eval_recall": 0.9575113808801214, |
|
"eval_runtime": 75.8291, |
|
"eval_samples_per_second": 17.381, |
|
"eval_steps_per_second": 17.381, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 7.007022205352059, |
|
"grad_norm": 0.4523492455482483, |
|
"learning_rate": 6.432574091592495e-06, |
|
"loss": 0.0557, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 7.014613778705637, |
|
"grad_norm": 0.03357968479394913, |
|
"learning_rate": 6.424107142882371e-06, |
|
"loss": 0.0008, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 7.022205352059214, |
|
"grad_norm": 46.4831657409668, |
|
"learning_rate": 6.415635747576613e-06, |
|
"loss": 0.0078, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 7.029796925412792, |
|
"grad_norm": 0.010737122967839241, |
|
"learning_rate": 6.40715993212609e-06, |
|
"loss": 0.0003, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 7.0373884987663695, |
|
"grad_norm": 21.10315704345703, |
|
"learning_rate": 6.398679722995468e-06, |
|
"loss": 0.1309, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 7.044980072119947, |
|
"grad_norm": 0.010574131272733212, |
|
"learning_rate": 6.3901951466631355e-06, |
|
"loss": 0.0138, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 7.052571645473525, |
|
"grad_norm": 0.0182713121175766, |
|
"learning_rate": 6.381706229621117e-06, |
|
"loss": 0.0002, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 7.0601632188271015, |
|
"grad_norm": 0.10783802717924118, |
|
"learning_rate": 6.373212998374989e-06, |
|
"loss": 0.0337, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 7.067754792180679, |
|
"grad_norm": 0.001446128822863102, |
|
"learning_rate": 6.364715479443798e-06, |
|
"loss": 0.0007, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 7.075346365534257, |
|
"grad_norm": 0.00694943917915225, |
|
"learning_rate": 6.356213699359982e-06, |
|
"loss": 0.071, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 7.082937938887834, |
|
"grad_norm": 0.09859494864940643, |
|
"learning_rate": 6.347707684669278e-06, |
|
"loss": 0.0005, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 7.090529512241412, |
|
"grad_norm": 0.0008273068233393133, |
|
"learning_rate": 6.33919746193065e-06, |
|
"loss": 0.0, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 7.09812108559499, |
|
"grad_norm": 0.0038316529244184494, |
|
"learning_rate": 6.330683057716198e-06, |
|
"loss": 0.0002, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 7.105712658948567, |
|
"grad_norm": 0.0030708136036992073, |
|
"learning_rate": 6.322164498611081e-06, |
|
"loss": 0.0444, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 7.113304232302145, |
|
"grad_norm": 0.0017414516769349575, |
|
"learning_rate": 6.313641811213429e-06, |
|
"loss": 0.0001, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 7.1208958056557226, |
|
"grad_norm": 0.0035761166363954544, |
|
"learning_rate": 6.305115022134262e-06, |
|
"loss": 0.0001, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 7.128487379009299, |
|
"grad_norm": 0.006457789335399866, |
|
"learning_rate": 6.296584157997408e-06, |
|
"loss": 0.0, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 7.136078952362877, |
|
"grad_norm": 0.002314153825864196, |
|
"learning_rate": 6.288049245439419e-06, |
|
"loss": 0.0, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 7.1436705257164546, |
|
"grad_norm": 0.008694717660546303, |
|
"learning_rate": 6.279510311109487e-06, |
|
"loss": 0.0001, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 7.151262099070032, |
|
"grad_norm": 0.0009509180672466755, |
|
"learning_rate": 6.270967381669362e-06, |
|
"loss": 0.0001, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 7.15885367242361, |
|
"grad_norm": 0.009006676264107227, |
|
"learning_rate": 6.262420483793267e-06, |
|
"loss": 0.0605, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 7.1664452457771874, |
|
"grad_norm": 0.048271872103214264, |
|
"learning_rate": 6.253869644167816e-06, |
|
"loss": 0.3191, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 7.174036819130765, |
|
"grad_norm": 0.001320886891335249, |
|
"learning_rate": 6.245314889491933e-06, |
|
"loss": 0.0066, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 7.181628392484343, |
|
"grad_norm": 0.4799332916736603, |
|
"learning_rate": 6.236756246476765e-06, |
|
"loss": 0.0261, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 7.18921996583792, |
|
"grad_norm": 1.649972677230835, |
|
"learning_rate": 6.228193741845598e-06, |
|
"loss": 0.001, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 7.196811539191497, |
|
"grad_norm": 0.001544089405797422, |
|
"learning_rate": 6.219627402333779e-06, |
|
"loss": 0.0001, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 7.204403112545075, |
|
"grad_norm": 0.0058356523513793945, |
|
"learning_rate": 6.211057254688625e-06, |
|
"loss": 0.0005, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 7.211994685898652, |
|
"grad_norm": 0.001541537931188941, |
|
"learning_rate": 6.202483325669345e-06, |
|
"loss": 0.0001, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 7.21958625925223, |
|
"grad_norm": 0.002176716923713684, |
|
"learning_rate": 6.193905642046957e-06, |
|
"loss": 0.0472, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 7.227177832605808, |
|
"grad_norm": 1.9937260150909424, |
|
"learning_rate": 6.1853242306041995e-06, |
|
"loss": 0.1573, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 7.234769405959385, |
|
"grad_norm": 0.005575578194111586, |
|
"learning_rate": 6.176739118135451e-06, |
|
"loss": 0.004, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 7.242360979312963, |
|
"grad_norm": 0.000481792347272858, |
|
"learning_rate": 6.168150331446647e-06, |
|
"loss": 0.0001, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 7.2499525526665405, |
|
"grad_norm": 17.994680404663086, |
|
"learning_rate": 6.159557897355198e-06, |
|
"loss": 0.1026, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 7.257544126020118, |
|
"grad_norm": 0.002096704440191388, |
|
"learning_rate": 6.1509618426898934e-06, |
|
"loss": 0.0004, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 7.265135699373695, |
|
"grad_norm": 0.8841345906257629, |
|
"learning_rate": 6.142362194290839e-06, |
|
"loss": 0.0017, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 7.2727272727272725, |
|
"grad_norm": 0.0021465634927153587, |
|
"learning_rate": 6.133758979009355e-06, |
|
"loss": 0.0001, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 7.28031884608085, |
|
"grad_norm": 0.000766513985581696, |
|
"learning_rate": 6.1251522237078996e-06, |
|
"loss": 0.2186, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 7.287910419434428, |
|
"grad_norm": 0.01812721975147724, |
|
"learning_rate": 6.116541955259986e-06, |
|
"loss": 0.0007, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 7.295501992788005, |
|
"grad_norm": 0.00034479115856811404, |
|
"learning_rate": 6.1079282005500965e-06, |
|
"loss": 0.0055, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 7.303093566141583, |
|
"grad_norm": 0.0008322893991135061, |
|
"learning_rate": 6.099310986473595e-06, |
|
"loss": 0.1915, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 7.310685139495161, |
|
"grad_norm": 0.001017669215798378, |
|
"learning_rate": 6.090690339936651e-06, |
|
"loss": 0.0001, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 7.318276712848738, |
|
"grad_norm": 0.003790239803493023, |
|
"learning_rate": 6.082066287856152e-06, |
|
"loss": 0.0001, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 7.325868286202315, |
|
"grad_norm": 0.001801560982130468, |
|
"learning_rate": 6.073438857159617e-06, |
|
"loss": 0.0404, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 7.333459859555893, |
|
"grad_norm": 0.0027911756187677383, |
|
"learning_rate": 6.064808074785112e-06, |
|
"loss": 0.0215, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 7.34105143290947, |
|
"grad_norm": 0.001065615564584732, |
|
"learning_rate": 6.056173967681172e-06, |
|
"loss": 0.0, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 7.348643006263048, |
|
"grad_norm": 0.0008436132338829339, |
|
"learning_rate": 6.047536562806712e-06, |
|
"loss": 0.0001, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 7.3562345796166255, |
|
"grad_norm": 0.8050636053085327, |
|
"learning_rate": 6.038895887130942e-06, |
|
"loss": 0.068, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 7.363826152970203, |
|
"grad_norm": 0.011237557046115398, |
|
"learning_rate": 6.030251967633288e-06, |
|
"loss": 0.0001, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 7.371417726323781, |
|
"grad_norm": 0.0008242133189924061, |
|
"learning_rate": 6.021604831303303e-06, |
|
"loss": 0.0963, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 7.379009299677358, |
|
"grad_norm": 0.9633244276046753, |
|
"learning_rate": 6.012954505140582e-06, |
|
"loss": 0.0032, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 7.386600873030936, |
|
"grad_norm": 33.064613342285156, |
|
"learning_rate": 6.004301016154683e-06, |
|
"loss": 0.0926, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 7.394192446384513, |
|
"grad_norm": 0.010244650766253471, |
|
"learning_rate": 5.995644391365038e-06, |
|
"loss": 0.0, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 7.40178401973809, |
|
"grad_norm": 0.0010498914634808898, |
|
"learning_rate": 5.98698465780087e-06, |
|
"loss": 0.2515, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 7.409375593091668, |
|
"grad_norm": 0.005540487356483936, |
|
"learning_rate": 5.978321842501108e-06, |
|
"loss": 0.0001, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 7.416967166445246, |
|
"grad_norm": 0.001410833327099681, |
|
"learning_rate": 5.9696559725143054e-06, |
|
"loss": 0.0024, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 7.424558739798823, |
|
"grad_norm": 0.11642355471849442, |
|
"learning_rate": 5.960987074898553e-06, |
|
"loss": 0.0004, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 7.432150313152401, |
|
"grad_norm": 0.029217828065156937, |
|
"learning_rate": 5.952315176721395e-06, |
|
"loss": 0.0002, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 7.439741886505979, |
|
"grad_norm": 0.057612184435129166, |
|
"learning_rate": 5.943640305059742e-06, |
|
"loss": 0.0455, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 7.447333459859556, |
|
"grad_norm": 30.20539665222168, |
|
"learning_rate": 5.9349624869997915e-06, |
|
"loss": 0.152, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 7.454925033213134, |
|
"grad_norm": 0.011167285032570362, |
|
"learning_rate": 5.926281749636941e-06, |
|
"loss": 0.0013, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 7.462516606566711, |
|
"grad_norm": 0.01445252075791359, |
|
"learning_rate": 5.9175981200757026e-06, |
|
"loss": 0.0275, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 7.470108179920288, |
|
"grad_norm": 0.0006470708176493645, |
|
"learning_rate": 5.908911625429617e-06, |
|
"loss": 0.0004, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 7.477699753273866, |
|
"grad_norm": 0.010150356218218803, |
|
"learning_rate": 5.900222292821173e-06, |
|
"loss": 0.0572, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 7.4852913266274435, |
|
"grad_norm": 0.05601394549012184, |
|
"learning_rate": 5.89153014938172e-06, |
|
"loss": 0.0004, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 7.492882899981021, |
|
"grad_norm": 0.007213375996798277, |
|
"learning_rate": 5.8828352222513866e-06, |
|
"loss": 0.0184, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 7.500474473334599, |
|
"grad_norm": 0.005943207535892725, |
|
"learning_rate": 5.874137538578984e-06, |
|
"loss": 0.0519, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 7.508066046688176, |
|
"grad_norm": 0.005052383989095688, |
|
"learning_rate": 5.865437125521943e-06, |
|
"loss": 0.091, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 7.515657620041754, |
|
"grad_norm": 0.000759047397878021, |
|
"learning_rate": 5.856734010246207e-06, |
|
"loss": 0.0, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 7.523249193395332, |
|
"grad_norm": 0.004873152356594801, |
|
"learning_rate": 5.848028219926162e-06, |
|
"loss": 0.0001, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 7.530840766748908, |
|
"grad_norm": 0.0005250478279776871, |
|
"learning_rate": 5.839319781744543e-06, |
|
"loss": 0.0, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 7.538432340102486, |
|
"grad_norm": 0.0007055936730466783, |
|
"learning_rate": 5.830608722892352e-06, |
|
"loss": 0.0001, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 7.546023913456064, |
|
"grad_norm": 0.0024068867787718773, |
|
"learning_rate": 5.821895070568781e-06, |
|
"loss": 0.0001, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 7.553615486809641, |
|
"grad_norm": 183.14315795898438, |
|
"learning_rate": 5.813178851981112e-06, |
|
"loss": 0.1222, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 7.561207060163219, |
|
"grad_norm": 0.8877391219139099, |
|
"learning_rate": 5.804460094344642e-06, |
|
"loss": 0.0002, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 7.5687986335167965, |
|
"grad_norm": 0.006915534846484661, |
|
"learning_rate": 5.795738824882596e-06, |
|
"loss": 0.0001, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 7.576390206870374, |
|
"grad_norm": 9.879432678222656, |
|
"learning_rate": 5.787015070826044e-06, |
|
"loss": 0.0076, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 7.583981780223952, |
|
"grad_norm": 0.004392684902995825, |
|
"learning_rate": 5.77828885941381e-06, |
|
"loss": 0.0001, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 7.5915733535775285, |
|
"grad_norm": 0.000951431633438915, |
|
"learning_rate": 5.769560217892395e-06, |
|
"loss": 0.0002, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.599164926931106, |
|
"grad_norm": 0.0021181986667215824, |
|
"learning_rate": 5.760829173515883e-06, |
|
"loss": 0.0002, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 7.606756500284684, |
|
"grad_norm": 0.006260419264435768, |
|
"learning_rate": 5.752095753545864e-06, |
|
"loss": 0.0577, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 7.614348073638261, |
|
"grad_norm": 0.0006751982145942748, |
|
"learning_rate": 5.743359985251348e-06, |
|
"loss": 0.0292, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 7.621939646991839, |
|
"grad_norm": 0.00024200859479606152, |
|
"learning_rate": 5.734621895908668e-06, |
|
"loss": 0.0238, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 7.629531220345417, |
|
"grad_norm": 0.002035447396337986, |
|
"learning_rate": 5.725881512801413e-06, |
|
"loss": 0.0002, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 7.637122793698994, |
|
"grad_norm": 0.0007019038312137127, |
|
"learning_rate": 5.717138863220333e-06, |
|
"loss": 0.0982, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 7.644714367052572, |
|
"grad_norm": 0.0009322810219600797, |
|
"learning_rate": 5.7083939744632514e-06, |
|
"loss": 0.0001, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 7.6523059404061495, |
|
"grad_norm": 0.011389588937163353, |
|
"learning_rate": 5.699646873834983e-06, |
|
"loss": 0.0691, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 7.659897513759727, |
|
"grad_norm": 0.01710079051554203, |
|
"learning_rate": 5.690897588647253e-06, |
|
"loss": 0.0005, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 7.667489087113304, |
|
"grad_norm": 9.926609992980957, |
|
"learning_rate": 5.6821461462186045e-06, |
|
"loss": 0.0352, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 7.6750806604668815, |
|
"grad_norm": 0.00021108197688590735, |
|
"learning_rate": 5.673392573874316e-06, |
|
"loss": 0.0005, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 7.682672233820459, |
|
"grad_norm": 0.001629292848519981, |
|
"learning_rate": 5.6646368989463185e-06, |
|
"loss": 0.0479, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 7.690263807174037, |
|
"grad_norm": 0.12789593636989594, |
|
"learning_rate": 5.655879148773107e-06, |
|
"loss": 0.0183, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 7.697855380527614, |
|
"grad_norm": 0.001387747353874147, |
|
"learning_rate": 5.647119350699655e-06, |
|
"loss": 0.0116, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 7.705446953881192, |
|
"grad_norm": 0.0015600691549479961, |
|
"learning_rate": 5.638357532077331e-06, |
|
"loss": 0.0316, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 7.71303852723477, |
|
"grad_norm": 0.0008326400420628488, |
|
"learning_rate": 5.629593720263816e-06, |
|
"loss": 0.0002, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 7.720630100588347, |
|
"grad_norm": 0.023590516299009323, |
|
"learning_rate": 5.620827942623008e-06, |
|
"loss": 0.0008, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 7.728221673941924, |
|
"grad_norm": 0.000754083099309355, |
|
"learning_rate": 5.612060226524948e-06, |
|
"loss": 0.0365, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 7.735813247295502, |
|
"grad_norm": 0.011727853678166866, |
|
"learning_rate": 5.603290599345726e-06, |
|
"loss": 0.0438, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 7.743404820649079, |
|
"grad_norm": 0.20062032341957092, |
|
"learning_rate": 5.5945190884674065e-06, |
|
"loss": 0.0056, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 7.750996394002657, |
|
"grad_norm": 0.30250805616378784, |
|
"learning_rate": 5.585745721277923e-06, |
|
"loss": 0.1501, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 7.758587967356235, |
|
"grad_norm": 0.00017410292639397085, |
|
"learning_rate": 5.5769705251710175e-06, |
|
"loss": 0.0002, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 7.766179540709812, |
|
"grad_norm": 0.011902794241905212, |
|
"learning_rate": 5.568193527546135e-06, |
|
"loss": 0.0001, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 7.77377111406339, |
|
"grad_norm": 0.3667079508304596, |
|
"learning_rate": 5.559414755808348e-06, |
|
"loss": 0.0394, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 7.7813626874169675, |
|
"grad_norm": 0.001953916857019067, |
|
"learning_rate": 5.550634237368269e-06, |
|
"loss": 0.0006, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 7.788954260770545, |
|
"grad_norm": 0.0013212488265708089, |
|
"learning_rate": 5.541851999641964e-06, |
|
"loss": 0.0004, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 7.796545834124123, |
|
"grad_norm": 0.00039594716508872807, |
|
"learning_rate": 5.533068070050867e-06, |
|
"loss": 0.0322, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 7.8041374074776995, |
|
"grad_norm": 0.000754969718400389, |
|
"learning_rate": 5.524282476021692e-06, |
|
"loss": 0.1497, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 7.811728980831277, |
|
"grad_norm": 0.035513028502464294, |
|
"learning_rate": 5.515495244986356e-06, |
|
"loss": 0.0081, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 7.819320554184855, |
|
"grad_norm": 0.0016785170882940292, |
|
"learning_rate": 5.5067064043818815e-06, |
|
"loss": 0.0001, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 7.826912127538432, |
|
"grad_norm": 8.234527194872499e-05, |
|
"learning_rate": 5.49791598165032e-06, |
|
"loss": 0.0001, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 7.83450370089201, |
|
"grad_norm": 0.0006789985345676541, |
|
"learning_rate": 5.489124004238662e-06, |
|
"loss": 0.0393, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 7.842095274245588, |
|
"grad_norm": 0.0023299374151974916, |
|
"learning_rate": 5.480330499598754e-06, |
|
"loss": 0.0046, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 7.849686847599165, |
|
"grad_norm": 0.007388091180473566, |
|
"learning_rate": 5.471535495187207e-06, |
|
"loss": 0.0001, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 7.857278420952742, |
|
"grad_norm": 0.00018302824173588306, |
|
"learning_rate": 5.462739018465318e-06, |
|
"loss": 0.1768, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 7.86486999430632, |
|
"grad_norm": 0.00418035127222538, |
|
"learning_rate": 5.45394109689898e-06, |
|
"loss": 0.0165, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 7.872461567659897, |
|
"grad_norm": 0.01187161449342966, |
|
"learning_rate": 5.445141757958599e-06, |
|
"loss": 0.0004, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 7.880053141013475, |
|
"grad_norm": 0.08545250445604324, |
|
"learning_rate": 5.436341029119004e-06, |
|
"loss": 0.06, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 7.8876447143670525, |
|
"grad_norm": 0.004683859180659056, |
|
"learning_rate": 5.427538937859368e-06, |
|
"loss": 0.2187, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 7.89523628772063, |
|
"grad_norm": 0.0011295732110738754, |
|
"learning_rate": 5.418735511663112e-06, |
|
"loss": 0.0002, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 7.902827861074208, |
|
"grad_norm": 0.0021211670245975256, |
|
"learning_rate": 5.409930778017828e-06, |
|
"loss": 0.0425, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 7.910419434427785, |
|
"grad_norm": 0.0004632298951037228, |
|
"learning_rate": 5.401124764415192e-06, |
|
"loss": 0.0001, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 7.918011007781363, |
|
"grad_norm": 0.03465382754802704, |
|
"learning_rate": 5.392317498350876e-06, |
|
"loss": 0.07, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 7.925602581134941, |
|
"grad_norm": 0.0012545166537165642, |
|
"learning_rate": 5.38350900732446e-06, |
|
"loss": 0.0003, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 7.933194154488517, |
|
"grad_norm": 0.0008013694896362722, |
|
"learning_rate": 5.374699318839352e-06, |
|
"loss": 0.0001, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 7.940785727842095, |
|
"grad_norm": 0.01796998642385006, |
|
"learning_rate": 5.365888460402695e-06, |
|
"loss": 0.0001, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 7.948377301195673, |
|
"grad_norm": 0.06785059720277786, |
|
"learning_rate": 5.357076459525291e-06, |
|
"loss": 0.0002, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 7.95596887454925, |
|
"grad_norm": 0.001381418784148991, |
|
"learning_rate": 5.348263343721503e-06, |
|
"loss": 0.0001, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 7.963560447902828, |
|
"grad_norm": 0.06072179973125458, |
|
"learning_rate": 5.339449140509179e-06, |
|
"loss": 0.0002, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 7.9711520212564055, |
|
"grad_norm": 0.024496397003531456, |
|
"learning_rate": 5.330633877409561e-06, |
|
"loss": 0.1215, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 7.978743594609983, |
|
"grad_norm": 0.0315159372985363, |
|
"learning_rate": 5.3218175819472e-06, |
|
"loss": 0.0001, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 7.986335167963561, |
|
"grad_norm": 0.0015373720088973641, |
|
"learning_rate": 5.313000281649872e-06, |
|
"loss": 0.0003, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 7.9939267413171375, |
|
"grad_norm": 0.12398699671030045, |
|
"learning_rate": 5.304182004048488e-06, |
|
"loss": 0.0002, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_f1": 0.9590113159486987, |
|
"eval_loss": 0.15769141912460327, |
|
"eval_precision": 0.9585736334342291, |
|
"eval_recall": 0.9590288315629742, |
|
"eval_runtime": 75.7332, |
|
"eval_samples_per_second": 17.403, |
|
"eval_steps_per_second": 17.403, |
|
"step": 10538 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 19755, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.701261509159456e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|