{ "best_metric": 0.9590113159486987, "best_model_checkpoint": "output_classification_1280/hazard/checkpoint-10538", "epoch": 8.0, "eval_steps": 500, "global_step": 10538, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007591573353577529, "grad_norm": 27.469635009765625, "learning_rate": 5.0607287449392715e-08, "loss": 1.6903, "step": 10 }, { "epoch": 0.015183146707155058, "grad_norm": 29.22759437561035, "learning_rate": 9.109311740890688e-08, "loss": 1.6631, "step": 20 }, { "epoch": 0.022774720060732587, "grad_norm": 22.48965835571289, "learning_rate": 1.417004048582996e-07, "loss": 1.7504, "step": 30 }, { "epoch": 0.030366293414310117, "grad_norm": 30.96166229248047, "learning_rate": 1.9230769230769234e-07, "loss": 1.7496, "step": 40 }, { "epoch": 0.03795786676788764, "grad_norm": 28.63855743408203, "learning_rate": 2.4291497975708504e-07, "loss": 1.6787, "step": 50 }, { "epoch": 0.045549440121465175, "grad_norm": 31.33084487915039, "learning_rate": 2.9352226720647774e-07, "loss": 1.7747, "step": 60 }, { "epoch": 0.0531410134750427, "grad_norm": 27.18292236328125, "learning_rate": 3.390688259109312e-07, "loss": 1.6577, "step": 70 }, { "epoch": 0.06073258682862023, "grad_norm": 30.794124603271484, "learning_rate": 3.896761133603239e-07, "loss": 1.7097, "step": 80 }, { "epoch": 0.06832416018219777, "grad_norm": 42.49530792236328, "learning_rate": 4.402834008097166e-07, "loss": 1.5986, "step": 90 }, { "epoch": 0.07591573353577528, "grad_norm": 26.470556259155273, "learning_rate": 4.908906882591093e-07, "loss": 1.7342, "step": 100 }, { "epoch": 0.08350730688935282, "grad_norm": 40.713924407958984, "learning_rate": 5.414979757085021e-07, "loss": 1.517, "step": 110 }, { "epoch": 0.09109888024293035, "grad_norm": 53.97127914428711, "learning_rate": 5.921052631578947e-07, "loss": 1.3995, "step": 120 }, { "epoch": 0.09869045359650788, "grad_norm": 45.6757698059082, "learning_rate": 6.427125506072875e-07, "loss": 1.2737, "step": 130 }, { "epoch": 0.1062820269500854, "grad_norm": 35.03736114501953, "learning_rate": 6.933198380566802e-07, "loss": 1.3719, "step": 140 }, { "epoch": 0.11387360030366293, "grad_norm": 26.410057067871094, "learning_rate": 7.388663967611337e-07, "loss": 1.1505, "step": 150 }, { "epoch": 0.12146517365724047, "grad_norm": 30.611797332763672, "learning_rate": 7.844129554655872e-07, "loss": 1.3579, "step": 160 }, { "epoch": 0.12905674701081798, "grad_norm": 36.64908981323242, "learning_rate": 8.350202429149798e-07, "loss": 1.2164, "step": 170 }, { "epoch": 0.13664832036439553, "grad_norm": 47.913612365722656, "learning_rate": 8.805668016194332e-07, "loss": 1.1154, "step": 180 }, { "epoch": 0.14423989371797305, "grad_norm": 71.07138061523438, "learning_rate": 9.31174089068826e-07, "loss": 1.2263, "step": 190 }, { "epoch": 0.15183146707155057, "grad_norm": 46.60552978515625, "learning_rate": 9.817813765182186e-07, "loss": 1.3512, "step": 200 }, { "epoch": 0.15942304042512812, "grad_norm": 39.1867561340332, "learning_rate": 1.0323886639676114e-06, "loss": 1.095, "step": 210 }, { "epoch": 0.16701461377870563, "grad_norm": 48.71131896972656, "learning_rate": 1.0829959514170041e-06, "loss": 1.401, "step": 220 }, { "epoch": 0.17460618713228315, "grad_norm": 46.5413703918457, "learning_rate": 1.133603238866397e-06, "loss": 1.4863, "step": 230 }, { "epoch": 0.1821977604858607, "grad_norm": 45.00301742553711, "learning_rate": 1.1842105263157894e-06, "loss": 1.0144, "step": 240 }, { "epoch": 0.18978933383943822, "grad_norm": 66.17977905273438, "learning_rate": 1.2348178137651822e-06, "loss": 1.2741, "step": 250 }, { "epoch": 0.19738090719301576, "grad_norm": 71.00930786132812, "learning_rate": 1.285425101214575e-06, "loss": 1.3536, "step": 260 }, { "epoch": 0.20497248054659328, "grad_norm": 66.67515563964844, "learning_rate": 1.336032388663968e-06, "loss": 1.247, "step": 270 }, { "epoch": 0.2125640539001708, "grad_norm": 47.43987274169922, "learning_rate": 1.3866396761133605e-06, "loss": 1.2843, "step": 280 }, { "epoch": 0.22015562725374835, "grad_norm": 41.783695220947266, "learning_rate": 1.4372469635627532e-06, "loss": 1.401, "step": 290 }, { "epoch": 0.22774720060732587, "grad_norm": 61.59716796875, "learning_rate": 1.4878542510121458e-06, "loss": 1.134, "step": 300 }, { "epoch": 0.23533877396090339, "grad_norm": 52.884761810302734, "learning_rate": 1.5384615384615387e-06, "loss": 1.2276, "step": 310 }, { "epoch": 0.24293034731448093, "grad_norm": 43.76587677001953, "learning_rate": 1.5890688259109313e-06, "loss": 1.1747, "step": 320 }, { "epoch": 0.25052192066805845, "grad_norm": 24.984729766845703, "learning_rate": 1.639676113360324e-06, "loss": 1.2557, "step": 330 }, { "epoch": 0.25811349402163597, "grad_norm": 32.03645324707031, "learning_rate": 1.6902834008097168e-06, "loss": 1.0106, "step": 340 }, { "epoch": 0.2657050673752135, "grad_norm": 37.198177337646484, "learning_rate": 1.7408906882591095e-06, "loss": 1.0211, "step": 350 }, { "epoch": 0.27329664072879106, "grad_norm": 46.07294464111328, "learning_rate": 1.791497975708502e-06, "loss": 1.019, "step": 360 }, { "epoch": 0.2808882140823686, "grad_norm": 61.57015609741211, "learning_rate": 1.8370445344129556e-06, "loss": 1.3087, "step": 370 }, { "epoch": 0.2884797874359461, "grad_norm": 37.366268157958984, "learning_rate": 1.8876518218623483e-06, "loss": 0.9793, "step": 380 }, { "epoch": 0.2960713607895236, "grad_norm": 25.108686447143555, "learning_rate": 1.938259109311741e-06, "loss": 1.2809, "step": 390 }, { "epoch": 0.30366293414310114, "grad_norm": 41.831172943115234, "learning_rate": 1.988866396761134e-06, "loss": 1.04, "step": 400 }, { "epoch": 0.3112545074966787, "grad_norm": 53.10079574584961, "learning_rate": 2.0394736842105266e-06, "loss": 1.1906, "step": 410 }, { "epoch": 0.31884608085025623, "grad_norm": 38.19053649902344, "learning_rate": 2.090080971659919e-06, "loss": 1.1724, "step": 420 }, { "epoch": 0.32643765420383375, "grad_norm": 43.842498779296875, "learning_rate": 2.140688259109312e-06, "loss": 1.1657, "step": 430 }, { "epoch": 0.33402922755741127, "grad_norm": 54.60807418823242, "learning_rate": 2.1912955465587044e-06, "loss": 0.9103, "step": 440 }, { "epoch": 0.3416208009109888, "grad_norm": 48.880218505859375, "learning_rate": 2.241902834008097e-06, "loss": 1.1537, "step": 450 }, { "epoch": 0.3492123742645663, "grad_norm": 40.26908493041992, "learning_rate": 2.2925101214574904e-06, "loss": 1.0753, "step": 460 }, { "epoch": 0.3568039476181439, "grad_norm": 65.76298522949219, "learning_rate": 2.3431174089068827e-06, "loss": 0.9305, "step": 470 }, { "epoch": 0.3643955209717214, "grad_norm": 33.159881591796875, "learning_rate": 2.3937246963562755e-06, "loss": 0.9382, "step": 480 }, { "epoch": 0.3719870943252989, "grad_norm": 32.02263259887695, "learning_rate": 2.4443319838056682e-06, "loss": 0.8098, "step": 490 }, { "epoch": 0.37957866767887644, "grad_norm": 50.231842041015625, "learning_rate": 2.494939271255061e-06, "loss": 1.0712, "step": 500 }, { "epoch": 0.38717024103245395, "grad_norm": 54.17763137817383, "learning_rate": 2.5455465587044537e-06, "loss": 0.9814, "step": 510 }, { "epoch": 0.39476181438603153, "grad_norm": 25.580745697021484, "learning_rate": 2.5961538461538465e-06, "loss": 0.5809, "step": 520 }, { "epoch": 0.40235338773960905, "grad_norm": 5.449360370635986, "learning_rate": 2.646761133603239e-06, "loss": 0.5567, "step": 530 }, { "epoch": 0.40994496109318657, "grad_norm": 29.534494400024414, "learning_rate": 2.697368421052632e-06, "loss": 0.5823, "step": 540 }, { "epoch": 0.4175365344467641, "grad_norm": 13.788243293762207, "learning_rate": 2.7479757085020247e-06, "loss": 0.9266, "step": 550 }, { "epoch": 0.4251281078003416, "grad_norm": 32.08829879760742, "learning_rate": 2.798582995951417e-06, "loss": 0.432, "step": 560 }, { "epoch": 0.4327196811539191, "grad_norm": 12.410125732421875, "learning_rate": 2.8491902834008103e-06, "loss": 0.7482, "step": 570 }, { "epoch": 0.4403112545074967, "grad_norm": 24.522109985351562, "learning_rate": 2.8997975708502026e-06, "loss": 0.504, "step": 580 }, { "epoch": 0.4479028278610742, "grad_norm": 17.949840545654297, "learning_rate": 2.9504048582995953e-06, "loss": 0.6836, "step": 590 }, { "epoch": 0.45549440121465173, "grad_norm": 13.618581771850586, "learning_rate": 3.001012145748988e-06, "loss": 0.6873, "step": 600 }, { "epoch": 0.46308597456822925, "grad_norm": 50.62519454956055, "learning_rate": 3.0516194331983804e-06, "loss": 0.4781, "step": 610 }, { "epoch": 0.47067754792180677, "grad_norm": 27.868289947509766, "learning_rate": 3.1022267206477736e-06, "loss": 0.7148, "step": 620 }, { "epoch": 0.47826912127538435, "grad_norm": 30.8429012298584, "learning_rate": 3.1528340080971664e-06, "loss": 0.591, "step": 630 }, { "epoch": 0.48586069462896186, "grad_norm": 51.042518615722656, "learning_rate": 3.2034412955465587e-06, "loss": 0.5481, "step": 640 }, { "epoch": 0.4934522679825394, "grad_norm": 42.53914260864258, "learning_rate": 3.254048582995952e-06, "loss": 0.6404, "step": 650 }, { "epoch": 0.5010438413361169, "grad_norm": 28.016672134399414, "learning_rate": 3.3046558704453446e-06, "loss": 1.075, "step": 660 }, { "epoch": 0.5086354146896944, "grad_norm": 26.764345169067383, "learning_rate": 3.355263157894737e-06, "loss": 0.5689, "step": 670 }, { "epoch": 0.5162269880432719, "grad_norm": 10.721156120300293, "learning_rate": 3.40587044534413e-06, "loss": 0.302, "step": 680 }, { "epoch": 0.5238185613968495, "grad_norm": 33.98798751831055, "learning_rate": 3.4564777327935225e-06, "loss": 0.3699, "step": 690 }, { "epoch": 0.531410134750427, "grad_norm": 98.7930908203125, "learning_rate": 3.5070850202429152e-06, "loss": 0.5585, "step": 700 }, { "epoch": 0.5390017081040046, "grad_norm": 17.008193969726562, "learning_rate": 3.557692307692308e-06, "loss": 0.5513, "step": 710 }, { "epoch": 0.5465932814575821, "grad_norm": 0.9657185077667236, "learning_rate": 3.6082995951417003e-06, "loss": 0.3778, "step": 720 }, { "epoch": 0.5541848548111596, "grad_norm": 22.920196533203125, "learning_rate": 3.6589068825910935e-06, "loss": 0.2108, "step": 730 }, { "epoch": 0.5617764281647372, "grad_norm": 24.24422264099121, "learning_rate": 3.7095141700404862e-06, "loss": 0.774, "step": 740 }, { "epoch": 0.5693680015183147, "grad_norm": 10.006725311279297, "learning_rate": 3.7601214574898786e-06, "loss": 0.3806, "step": 750 }, { "epoch": 0.5769595748718922, "grad_norm": 25.408447265625, "learning_rate": 3.8107287449392717e-06, "loss": 0.3539, "step": 760 }, { "epoch": 0.5845511482254697, "grad_norm": 1.4603581428527832, "learning_rate": 3.8613360323886645e-06, "loss": 0.2608, "step": 770 }, { "epoch": 0.5921427215790472, "grad_norm": 16.798980712890625, "learning_rate": 3.911943319838057e-06, "loss": 0.3287, "step": 780 }, { "epoch": 0.5997342949326248, "grad_norm": 11.706854820251465, "learning_rate": 3.96255060728745e-06, "loss": 0.5302, "step": 790 }, { "epoch": 0.6073258682862023, "grad_norm": 20.42545509338379, "learning_rate": 4.013157894736842e-06, "loss": 0.489, "step": 800 }, { "epoch": 0.6149174416397798, "grad_norm": 44.284629821777344, "learning_rate": 4.063765182186235e-06, "loss": 0.4183, "step": 810 }, { "epoch": 0.6225090149933574, "grad_norm": 35.91806411743164, "learning_rate": 4.114372469635628e-06, "loss": 0.6323, "step": 820 }, { "epoch": 0.6301005883469349, "grad_norm": 5.10564661026001, "learning_rate": 4.16497975708502e-06, "loss": 0.2137, "step": 830 }, { "epoch": 0.6376921617005125, "grad_norm": 14.327881813049316, "learning_rate": 4.215587044534413e-06, "loss": 0.1283, "step": 840 }, { "epoch": 0.64528373505409, "grad_norm": 0.4119018018245697, "learning_rate": 4.2661943319838065e-06, "loss": 0.5361, "step": 850 }, { "epoch": 0.6528753084076675, "grad_norm": 0.33248305320739746, "learning_rate": 4.316801619433199e-06, "loss": 0.3669, "step": 860 }, { "epoch": 0.660466881761245, "grad_norm": 0.44110462069511414, "learning_rate": 4.367408906882591e-06, "loss": 0.2979, "step": 870 }, { "epoch": 0.6680584551148225, "grad_norm": 0.34030860662460327, "learning_rate": 4.418016194331984e-06, "loss": 0.2611, "step": 880 }, { "epoch": 0.6756500284684, "grad_norm": 46.19267272949219, "learning_rate": 4.468623481781377e-06, "loss": 0.2948, "step": 890 }, { "epoch": 0.6832416018219776, "grad_norm": 33.486717224121094, "learning_rate": 4.51923076923077e-06, "loss": 0.2593, "step": 900 }, { "epoch": 0.6908331751755551, "grad_norm": 43.04954528808594, "learning_rate": 4.569838056680162e-06, "loss": 0.2791, "step": 910 }, { "epoch": 0.6984247485291326, "grad_norm": 57.075809478759766, "learning_rate": 4.6204453441295545e-06, "loss": 0.2198, "step": 920 }, { "epoch": 0.7060163218827102, "grad_norm": 52.269168853759766, "learning_rate": 4.671052631578948e-06, "loss": 0.4377, "step": 930 }, { "epoch": 0.7136078952362878, "grad_norm": 0.06886545568704605, "learning_rate": 4.72165991902834e-06, "loss": 0.1961, "step": 940 }, { "epoch": 0.7211994685898653, "grad_norm": 41.10899353027344, "learning_rate": 4.772267206477733e-06, "loss": 0.4847, "step": 950 }, { "epoch": 0.7287910419434428, "grad_norm": 2.2750415802001953, "learning_rate": 4.822874493927126e-06, "loss": 0.2632, "step": 960 }, { "epoch": 0.7363826152970203, "grad_norm": 0.374896764755249, "learning_rate": 4.873481781376519e-06, "loss": 0.2243, "step": 970 }, { "epoch": 0.7439741886505978, "grad_norm": 29.88395118713379, "learning_rate": 4.924089068825911e-06, "loss": 0.256, "step": 980 }, { "epoch": 0.7515657620041754, "grad_norm": 48.7998161315918, "learning_rate": 4.974696356275304e-06, "loss": 0.2969, "step": 990 }, { "epoch": 0.7591573353577529, "grad_norm": 33.62395095825195, "learning_rate": 5.025303643724697e-06, "loss": 0.4137, "step": 1000 }, { "epoch": 0.7667489087113304, "grad_norm": 37.788795471191406, "learning_rate": 5.07591093117409e-06, "loss": 0.2332, "step": 1010 }, { "epoch": 0.7743404820649079, "grad_norm": 0.14387387037277222, "learning_rate": 5.126518218623482e-06, "loss": 0.2919, "step": 1020 }, { "epoch": 0.7819320554184854, "grad_norm": 5.84027099609375, "learning_rate": 5.177125506072875e-06, "loss": 0.2009, "step": 1030 }, { "epoch": 0.7895236287720631, "grad_norm": 0.04207382723689079, "learning_rate": 5.227732793522268e-06, "loss": 0.2878, "step": 1040 }, { "epoch": 0.7971152021256406, "grad_norm": 45.870460510253906, "learning_rate": 5.27834008097166e-06, "loss": 0.2449, "step": 1050 }, { "epoch": 0.8047067754792181, "grad_norm": 51.27897262573242, "learning_rate": 5.328947368421054e-06, "loss": 0.2529, "step": 1060 }, { "epoch": 0.8122983488327956, "grad_norm": 0.5692594051361084, "learning_rate": 5.379554655870446e-06, "loss": 0.6134, "step": 1070 }, { "epoch": 0.8198899221863731, "grad_norm": 0.09678292274475098, "learning_rate": 5.430161943319839e-06, "loss": 0.2444, "step": 1080 }, { "epoch": 0.8274814955399507, "grad_norm": 1.1001735925674438, "learning_rate": 5.480769230769232e-06, "loss": 0.2606, "step": 1090 }, { "epoch": 0.8350730688935282, "grad_norm": 0.183668851852417, "learning_rate": 5.531376518218624e-06, "loss": 0.4214, "step": 1100 }, { "epoch": 0.8426646422471057, "grad_norm": 1.1151483058929443, "learning_rate": 5.5819838056680164e-06, "loss": 0.1742, "step": 1110 }, { "epoch": 0.8502562156006832, "grad_norm": 4.986824989318848, "learning_rate": 5.632591093117409e-06, "loss": 0.2275, "step": 1120 }, { "epoch": 0.8578477889542607, "grad_norm": 40.5273323059082, "learning_rate": 5.683198380566802e-06, "loss": 0.2895, "step": 1130 }, { "epoch": 0.8654393623078382, "grad_norm": 27.60036849975586, "learning_rate": 5.733805668016194e-06, "loss": 0.1973, "step": 1140 }, { "epoch": 0.8730309356614159, "grad_norm": 0.3474140763282776, "learning_rate": 5.784412955465587e-06, "loss": 0.2508, "step": 1150 }, { "epoch": 0.8806225090149934, "grad_norm": 41.10483169555664, "learning_rate": 5.835020242914981e-06, "loss": 0.1403, "step": 1160 }, { "epoch": 0.8882140823685709, "grad_norm": 38.43809509277344, "learning_rate": 5.885627530364373e-06, "loss": 0.1639, "step": 1170 }, { "epoch": 0.8958056557221484, "grad_norm": 0.1039985790848732, "learning_rate": 5.936234817813765e-06, "loss": 0.3821, "step": 1180 }, { "epoch": 0.903397229075726, "grad_norm": 7.511643409729004, "learning_rate": 5.9868421052631585e-06, "loss": 0.3217, "step": 1190 }, { "epoch": 0.9109888024293035, "grad_norm": 1.0779646635055542, "learning_rate": 6.037449392712551e-06, "loss": 0.306, "step": 1200 }, { "epoch": 0.918580375782881, "grad_norm": 36.478607177734375, "learning_rate": 6.088056680161943e-06, "loss": 0.2311, "step": 1210 }, { "epoch": 0.9261719491364585, "grad_norm": 5.484299659729004, "learning_rate": 6.138663967611337e-06, "loss": 0.1957, "step": 1220 }, { "epoch": 0.933763522490036, "grad_norm": 36.05448532104492, "learning_rate": 6.1892712550607295e-06, "loss": 0.1806, "step": 1230 }, { "epoch": 0.9413550958436135, "grad_norm": 0.11734521389007568, "learning_rate": 6.239878542510122e-06, "loss": 0.1755, "step": 1240 }, { "epoch": 0.9489466691971912, "grad_norm": 29.589279174804688, "learning_rate": 6.290485829959515e-06, "loss": 0.1282, "step": 1250 }, { "epoch": 0.9565382425507687, "grad_norm": 0.0918528214097023, "learning_rate": 6.341093117408907e-06, "loss": 0.1178, "step": 1260 }, { "epoch": 0.9641298159043462, "grad_norm": 23.708993911743164, "learning_rate": 6.3917004048583e-06, "loss": 0.4097, "step": 1270 }, { "epoch": 0.9717213892579237, "grad_norm": 34.241607666015625, "learning_rate": 6.442307692307693e-06, "loss": 0.25, "step": 1280 }, { "epoch": 0.9793129626115012, "grad_norm": 3.067420482635498, "learning_rate": 6.492914979757086e-06, "loss": 0.201, "step": 1290 }, { "epoch": 0.9869045359650788, "grad_norm": 20.88066291809082, "learning_rate": 6.543522267206478e-06, "loss": 0.2371, "step": 1300 }, { "epoch": 0.9944961093186563, "grad_norm": 463.48541259765625, "learning_rate": 6.5941295546558715e-06, "loss": 0.436, "step": 1310 }, { "epoch": 0.9998102106661606, "eval_f1": 0.8680898011188399, "eval_loss": 0.3059525787830353, "eval_precision": 0.8342809981821465, "eval_recall": 0.8694992412746586, "eval_runtime": 75.6444, "eval_samples_per_second": 17.424, "eval_steps_per_second": 17.424, "step": 1317 }, { "epoch": 1.0020876826722338, "grad_norm": 49.61616897583008, "learning_rate": 6.644736842105264e-06, "loss": 0.0671, "step": 1320 }, { "epoch": 1.0096792560258114, "grad_norm": 0.10375616699457169, "learning_rate": 6.695344129554656e-06, "loss": 0.3414, "step": 1330 }, { "epoch": 1.0172708293793888, "grad_norm": 1.4322081804275513, "learning_rate": 6.745951417004049e-06, "loss": 0.4417, "step": 1340 }, { "epoch": 1.0248624027329665, "grad_norm": 33.353389739990234, "learning_rate": 6.796558704453442e-06, "loss": 0.2982, "step": 1350 }, { "epoch": 1.0324539760865439, "grad_norm": 0.03437357768416405, "learning_rate": 6.847165991902834e-06, "loss": 0.1854, "step": 1360 }, { "epoch": 1.0400455494401215, "grad_norm": 25.882795333862305, "learning_rate": 6.897773279352226e-06, "loss": 0.2332, "step": 1370 }, { "epoch": 1.047637122793699, "grad_norm": 0.06532780081033707, "learning_rate": 6.94838056680162e-06, "loss": 0.0215, "step": 1380 }, { "epoch": 1.0552286961472765, "grad_norm": 32.39247131347656, "learning_rate": 6.998987854251013e-06, "loss": 0.072, "step": 1390 }, { "epoch": 1.062820269500854, "grad_norm": 0.027906494215130806, "learning_rate": 7.049595141700405e-06, "loss": 0.2047, "step": 1400 }, { "epoch": 1.0704118428544316, "grad_norm": 0.6285625100135803, "learning_rate": 7.100202429149798e-06, "loss": 0.1842, "step": 1410 }, { "epoch": 1.0780034162080092, "grad_norm": 45.90504837036133, "learning_rate": 7.1508097165991906e-06, "loss": 0.3873, "step": 1420 }, { "epoch": 1.0855949895615866, "grad_norm": 0.1192048192024231, "learning_rate": 7.201417004048583e-06, "loss": 0.0922, "step": 1430 }, { "epoch": 1.0931865629151643, "grad_norm": 0.01541421003639698, "learning_rate": 7.252024291497977e-06, "loss": 0.2405, "step": 1440 }, { "epoch": 1.1007781362687417, "grad_norm": 1.1560391187667847, "learning_rate": 7.302631578947369e-06, "loss": 0.127, "step": 1450 }, { "epoch": 1.1083697096223193, "grad_norm": 0.03278697654604912, "learning_rate": 7.353238866396762e-06, "loss": 0.1782, "step": 1460 }, { "epoch": 1.1159612829758967, "grad_norm": 0.019922947511076927, "learning_rate": 7.403846153846155e-06, "loss": 0.0464, "step": 1470 }, { "epoch": 1.1235528563294743, "grad_norm": 0.06464574486017227, "learning_rate": 7.454453441295547e-06, "loss": 0.1965, "step": 1480 }, { "epoch": 1.1311444296830517, "grad_norm": 3.4782345294952393, "learning_rate": 7.505060728744939e-06, "loss": 0.2716, "step": 1490 }, { "epoch": 1.1387360030366294, "grad_norm": 11.709443092346191, "learning_rate": 7.555668016194333e-06, "loss": 0.0861, "step": 1500 }, { "epoch": 1.146327576390207, "grad_norm": 0.06341992318630219, "learning_rate": 7.606275303643725e-06, "loss": 0.2737, "step": 1510 }, { "epoch": 1.1539191497437844, "grad_norm": 0.41259318590164185, "learning_rate": 7.656882591093118e-06, "loss": 0.0553, "step": 1520 }, { "epoch": 1.1615107230973618, "grad_norm": 0.2754802107810974, "learning_rate": 7.70748987854251e-06, "loss": 0.1928, "step": 1530 }, { "epoch": 1.1691022964509394, "grad_norm": 35.12890625, "learning_rate": 7.758097165991903e-06, "loss": 0.2975, "step": 1540 }, { "epoch": 1.176693869804517, "grad_norm": 0.01568063162267208, "learning_rate": 7.808704453441295e-06, "loss": 0.0757, "step": 1550 }, { "epoch": 1.1842854431580945, "grad_norm": 63.980228424072266, "learning_rate": 7.859311740890689e-06, "loss": 0.2807, "step": 1560 }, { "epoch": 1.191877016511672, "grad_norm": 0.28402331471443176, "learning_rate": 7.909919028340081e-06, "loss": 0.0808, "step": 1570 }, { "epoch": 1.1994685898652495, "grad_norm": 0.028258422389626503, "learning_rate": 7.960526315789474e-06, "loss": 0.1397, "step": 1580 }, { "epoch": 1.2070601632188271, "grad_norm": 3.0772080421447754, "learning_rate": 8.011133603238868e-06, "loss": 0.0761, "step": 1590 }, { "epoch": 1.2146517365724046, "grad_norm": 0.37224826216697693, "learning_rate": 8.06174089068826e-06, "loss": 0.2265, "step": 1600 }, { "epoch": 1.2222433099259822, "grad_norm": 0.02686912938952446, "learning_rate": 8.112348178137652e-06, "loss": 0.0467, "step": 1610 }, { "epoch": 1.2298348832795596, "grad_norm": 0.040963444858789444, "learning_rate": 8.162955465587045e-06, "loss": 0.3815, "step": 1620 }, { "epoch": 1.2374264566331372, "grad_norm": 15.119370460510254, "learning_rate": 8.213562753036439e-06, "loss": 0.1005, "step": 1630 }, { "epoch": 1.2450180299867148, "grad_norm": 35.875064849853516, "learning_rate": 8.264170040485831e-06, "loss": 0.3051, "step": 1640 }, { "epoch": 1.2526096033402923, "grad_norm": 70.46387481689453, "learning_rate": 8.314777327935223e-06, "loss": 0.175, "step": 1650 }, { "epoch": 1.2602011766938699, "grad_norm": 0.02256329357624054, "learning_rate": 8.365384615384616e-06, "loss": 0.2415, "step": 1660 }, { "epoch": 1.2677927500474473, "grad_norm": 35.11568069458008, "learning_rate": 8.415991902834008e-06, "loss": 0.2629, "step": 1670 }, { "epoch": 1.275384323401025, "grad_norm": 71.48613739013672, "learning_rate": 8.4665991902834e-06, "loss": 0.4151, "step": 1680 }, { "epoch": 1.2829758967546023, "grad_norm": 78.90449523925781, "learning_rate": 8.517206477732795e-06, "loss": 0.1292, "step": 1690 }, { "epoch": 1.29056747010818, "grad_norm": 31.373775482177734, "learning_rate": 8.567813765182187e-06, "loss": 0.26, "step": 1700 }, { "epoch": 1.2981590434617574, "grad_norm": 0.049251481890678406, "learning_rate": 8.61842105263158e-06, "loss": 0.4099, "step": 1710 }, { "epoch": 1.305750616815335, "grad_norm": 26.275672912597656, "learning_rate": 8.669028340080973e-06, "loss": 0.1674, "step": 1720 }, { "epoch": 1.3133421901689126, "grad_norm": 56.808570861816406, "learning_rate": 8.719635627530366e-06, "loss": 0.2071, "step": 1730 }, { "epoch": 1.32093376352249, "grad_norm": 12.969684600830078, "learning_rate": 8.770242914979758e-06, "loss": 0.3515, "step": 1740 }, { "epoch": 1.3285253368760674, "grad_norm": 0.2686771750450134, "learning_rate": 8.82085020242915e-06, "loss": 0.128, "step": 1750 }, { "epoch": 1.336116910229645, "grad_norm": 0.012039333581924438, "learning_rate": 8.871457489878543e-06, "loss": 0.1058, "step": 1760 }, { "epoch": 1.3437084835832227, "grad_norm": 20.223878860473633, "learning_rate": 8.922064777327935e-06, "loss": 0.196, "step": 1770 }, { "epoch": 1.3513000569368, "grad_norm": 0.014049122110009193, "learning_rate": 8.972672064777329e-06, "loss": 0.3733, "step": 1780 }, { "epoch": 1.3588916302903777, "grad_norm": 42.03798294067383, "learning_rate": 9.023279352226721e-06, "loss": 0.1683, "step": 1790 }, { "epoch": 1.3664832036439551, "grad_norm": 0.044906727969646454, "learning_rate": 9.073886639676114e-06, "loss": 0.2116, "step": 1800 }, { "epoch": 1.3740747769975328, "grad_norm": 33.70309829711914, "learning_rate": 9.124493927125508e-06, "loss": 0.3049, "step": 1810 }, { "epoch": 1.3816663503511104, "grad_norm": 8.82701301574707, "learning_rate": 9.1751012145749e-06, "loss": 0.0822, "step": 1820 }, { "epoch": 1.3892579237046878, "grad_norm": 2.3878729343414307, "learning_rate": 9.225708502024292e-06, "loss": 0.0592, "step": 1830 }, { "epoch": 1.3968494970582652, "grad_norm": 0.0016124140238389373, "learning_rate": 9.276315789473686e-06, "loss": 0.1208, "step": 1840 }, { "epoch": 1.4044410704118429, "grad_norm": 0.13426095247268677, "learning_rate": 9.326923076923079e-06, "loss": 0.2488, "step": 1850 }, { "epoch": 1.4120326437654205, "grad_norm": 95.8023681640625, "learning_rate": 9.377530364372471e-06, "loss": 0.3505, "step": 1860 }, { "epoch": 1.4196242171189979, "grad_norm": 0.10023036599159241, "learning_rate": 9.428137651821863e-06, "loss": 0.2593, "step": 1870 }, { "epoch": 1.4272157904725755, "grad_norm": 0.0036512434016913176, "learning_rate": 9.478744939271256e-06, "loss": 0.1653, "step": 1880 }, { "epoch": 1.434807363826153, "grad_norm": 0.11651404201984406, "learning_rate": 9.529352226720648e-06, "loss": 0.1281, "step": 1890 }, { "epoch": 1.4423989371797306, "grad_norm": 99.45907592773438, "learning_rate": 9.57995951417004e-06, "loss": 0.2001, "step": 1900 }, { "epoch": 1.449990510533308, "grad_norm": 0.42387983202934265, "learning_rate": 9.630566801619434e-06, "loss": 0.2895, "step": 1910 }, { "epoch": 1.4575820838868856, "grad_norm": 155.79856872558594, "learning_rate": 9.681174089068827e-06, "loss": 0.2749, "step": 1920 }, { "epoch": 1.465173657240463, "grad_norm": 0.036998867988586426, "learning_rate": 9.731781376518219e-06, "loss": 0.3386, "step": 1930 }, { "epoch": 1.4727652305940406, "grad_norm": 20.147798538208008, "learning_rate": 9.782388663967613e-06, "loss": 0.0259, "step": 1940 }, { "epoch": 1.4803568039476183, "grad_norm": 0.6697649955749512, "learning_rate": 9.832995951417005e-06, "loss": 0.0671, "step": 1950 }, { "epoch": 1.4879483773011957, "grad_norm": 34.21855545043945, "learning_rate": 9.883603238866398e-06, "loss": 0.4116, "step": 1960 }, { "epoch": 1.495539950654773, "grad_norm": 55.607818603515625, "learning_rate": 9.93421052631579e-06, "loss": 0.2809, "step": 1970 }, { "epoch": 1.5031315240083507, "grad_norm": 7.255304336547852, "learning_rate": 9.984817813765182e-06, "loss": 0.2086, "step": 1980 }, { "epoch": 1.5107230973619283, "grad_norm": 0.03336051478981972, "learning_rate": 9.999996175090899e-06, "loss": 0.0513, "step": 1990 }, { "epoch": 1.5183146707155057, "grad_norm": 0.016688983887434006, "learning_rate": 9.999977440856317e-06, "loss": 0.1644, "step": 2000 }, { "epoch": 1.5259062440690834, "grad_norm": 25.093719482421875, "learning_rate": 9.999943094820354e-06, "loss": 0.2127, "step": 2010 }, { "epoch": 1.5334978174226608, "grad_norm": 28.240819931030273, "learning_rate": 9.999893137090254e-06, "loss": 0.2039, "step": 2020 }, { "epoch": 1.5410893907762384, "grad_norm": 0.2675958275794983, "learning_rate": 9.999827567822e-06, "loss": 0.1192, "step": 2030 }, { "epoch": 1.548680964129816, "grad_norm": 0.0035021628718823195, "learning_rate": 9.999746387220327e-06, "loss": 0.4307, "step": 2040 }, { "epoch": 1.5562725374833934, "grad_norm": 45.449134826660156, "learning_rate": 9.999649595538705e-06, "loss": 0.1564, "step": 2050 }, { "epoch": 1.5638641108369709, "grad_norm": 28.17760467529297, "learning_rate": 9.999537193079362e-06, "loss": 0.3947, "step": 2060 }, { "epoch": 1.5714556841905485, "grad_norm": 0.08233608305454254, "learning_rate": 9.999409180193255e-06, "loss": 0.2997, "step": 2070 }, { "epoch": 1.5790472575441261, "grad_norm": 0.010642267763614655, "learning_rate": 9.99926555728009e-06, "loss": 0.0658, "step": 2080 }, { "epoch": 1.5866388308977035, "grad_norm": 33.69260787963867, "learning_rate": 9.999106324788313e-06, "loss": 0.2578, "step": 2090 }, { "epoch": 1.594230404251281, "grad_norm": 35.530982971191406, "learning_rate": 9.998931483215103e-06, "loss": 0.0085, "step": 2100 }, { "epoch": 1.6018219776048586, "grad_norm": 0.02198372408747673, "learning_rate": 9.998741033106385e-06, "loss": 0.1038, "step": 2110 }, { "epoch": 1.6094135509584362, "grad_norm": 3.9551048278808594, "learning_rate": 9.998534975056814e-06, "loss": 0.1167, "step": 2120 }, { "epoch": 1.6170051243120138, "grad_norm": 1.1452088356018066, "learning_rate": 9.998313309709782e-06, "loss": 0.1636, "step": 2130 }, { "epoch": 1.6245966976655912, "grad_norm": 45.56749725341797, "learning_rate": 9.998076037757408e-06, "loss": 0.2347, "step": 2140 }, { "epoch": 1.6321882710191686, "grad_norm": 0.002319494029507041, "learning_rate": 9.997823159940545e-06, "loss": 0.0795, "step": 2150 }, { "epoch": 1.6397798443727463, "grad_norm": 0.028734903782606125, "learning_rate": 9.997554677048776e-06, "loss": 0.2305, "step": 2160 }, { "epoch": 1.647371417726324, "grad_norm": 0.004517258144915104, "learning_rate": 9.997270589920399e-06, "loss": 0.0011, "step": 2170 }, { "epoch": 1.6549629910799013, "grad_norm": 1.5917277336120605, "learning_rate": 9.996970899442444e-06, "loss": 0.1614, "step": 2180 }, { "epoch": 1.6625545644334787, "grad_norm": 0.05392596498131752, "learning_rate": 9.996655606550657e-06, "loss": 0.2937, "step": 2190 }, { "epoch": 1.6701461377870563, "grad_norm": 39.229007720947266, "learning_rate": 9.996324712229499e-06, "loss": 0.1227, "step": 2200 }, { "epoch": 1.677737711140634, "grad_norm": 19.827287673950195, "learning_rate": 9.995978217512146e-06, "loss": 0.1703, "step": 2210 }, { "epoch": 1.6853292844942114, "grad_norm": 0.007869013585150242, "learning_rate": 9.995616123480485e-06, "loss": 0.298, "step": 2220 }, { "epoch": 1.692920857847789, "grad_norm": 17.308448791503906, "learning_rate": 9.99523843126511e-06, "loss": 0.2699, "step": 2230 }, { "epoch": 1.7005124312013664, "grad_norm": 0.07290565222501755, "learning_rate": 9.994845142045315e-06, "loss": 0.0798, "step": 2240 }, { "epoch": 1.708104004554944, "grad_norm": 0.0642884150147438, "learning_rate": 9.994436257049098e-06, "loss": 0.3115, "step": 2250 }, { "epoch": 1.7156955779085217, "grad_norm": 3.773754835128784, "learning_rate": 9.994011777553152e-06, "loss": 0.1151, "step": 2260 }, { "epoch": 1.723287151262099, "grad_norm": 12.578306198120117, "learning_rate": 9.99357170488286e-06, "loss": 0.2351, "step": 2270 }, { "epoch": 1.7308787246156765, "grad_norm": 0.12735772132873535, "learning_rate": 9.993116040412289e-06, "loss": 0.2368, "step": 2280 }, { "epoch": 1.7384702979692541, "grad_norm": 37.49304962158203, "learning_rate": 9.9926447855642e-06, "loss": 0.1451, "step": 2290 }, { "epoch": 1.7460618713228317, "grad_norm": 7.337117671966553, "learning_rate": 9.992157941810027e-06, "loss": 0.2029, "step": 2300 }, { "epoch": 1.7536534446764092, "grad_norm": 93.44843292236328, "learning_rate": 9.991655510669875e-06, "loss": 0.2177, "step": 2310 }, { "epoch": 1.7612450180299866, "grad_norm": 6.563670635223389, "learning_rate": 9.991137493712524e-06, "loss": 0.0768, "step": 2320 }, { "epoch": 1.7688365913835642, "grad_norm": 0.021621128544211388, "learning_rate": 9.990603892555417e-06, "loss": 0.1178, "step": 2330 }, { "epoch": 1.7764281647371418, "grad_norm": 0.022252781316637993, "learning_rate": 9.990054708864655e-06, "loss": 0.1944, "step": 2340 }, { "epoch": 1.7840197380907195, "grad_norm": 21.766817092895508, "learning_rate": 9.989489944355e-06, "loss": 0.355, "step": 2350 }, { "epoch": 1.7916113114442969, "grad_norm": 0.05736351013183594, "learning_rate": 9.988909600789851e-06, "loss": 0.1318, "step": 2360 }, { "epoch": 1.7992028847978743, "grad_norm": 44.977779388427734, "learning_rate": 9.988313679981263e-06, "loss": 0.0222, "step": 2370 }, { "epoch": 1.806794458151452, "grad_norm": 0.016255084425210953, "learning_rate": 9.987702183789922e-06, "loss": 0.1285, "step": 2380 }, { "epoch": 1.8143860315050295, "grad_norm": 0.5945267081260681, "learning_rate": 9.987075114125148e-06, "loss": 0.3838, "step": 2390 }, { "epoch": 1.821977604858607, "grad_norm": 0.004704204387962818, "learning_rate": 9.986432472944887e-06, "loss": 0.1587, "step": 2400 }, { "epoch": 1.8295691782121843, "grad_norm": 0.07433657348155975, "learning_rate": 9.985774262255708e-06, "loss": 0.1604, "step": 2410 }, { "epoch": 1.837160751565762, "grad_norm": 0.08134903013706207, "learning_rate": 9.985100484112786e-06, "loss": 0.2395, "step": 2420 }, { "epoch": 1.8447523249193396, "grad_norm": 0.5896629095077515, "learning_rate": 9.984411140619914e-06, "loss": 0.0397, "step": 2430 }, { "epoch": 1.852343898272917, "grad_norm": 0.0015955844428390265, "learning_rate": 9.983706233929477e-06, "loss": 0.2479, "step": 2440 }, { "epoch": 1.8599354716264946, "grad_norm": 12.32898998260498, "learning_rate": 9.982985766242458e-06, "loss": 0.071, "step": 2450 }, { "epoch": 1.867527044980072, "grad_norm": 0.17913532257080078, "learning_rate": 9.98224973980843e-06, "loss": 0.0426, "step": 2460 }, { "epoch": 1.8751186183336497, "grad_norm": 0.06611054390668869, "learning_rate": 9.981498156925539e-06, "loss": 0.3534, "step": 2470 }, { "epoch": 1.8827101916872273, "grad_norm": 2.170029640197754, "learning_rate": 9.98073101994051e-06, "loss": 0.1845, "step": 2480 }, { "epoch": 1.8903017650408047, "grad_norm": 3.257478952407837, "learning_rate": 9.979948331248633e-06, "loss": 0.0038, "step": 2490 }, { "epoch": 1.8978933383943821, "grad_norm": 138.6713409423828, "learning_rate": 9.979150093293753e-06, "loss": 0.1855, "step": 2500 }, { "epoch": 1.9054849117479598, "grad_norm": 0.7939999103546143, "learning_rate": 9.978336308568266e-06, "loss": 0.2101, "step": 2510 }, { "epoch": 1.9130764851015374, "grad_norm": 14.836468696594238, "learning_rate": 9.977506979613118e-06, "loss": 0.2692, "step": 2520 }, { "epoch": 1.9206680584551148, "grad_norm": 0.4420275390148163, "learning_rate": 9.97666210901778e-06, "loss": 0.0356, "step": 2530 }, { "epoch": 1.9282596318086922, "grad_norm": 4.923569679260254, "learning_rate": 9.975801699420256e-06, "loss": 0.1263, "step": 2540 }, { "epoch": 1.9358512051622698, "grad_norm": 0.01419526245445013, "learning_rate": 9.974925753507066e-06, "loss": 0.0735, "step": 2550 }, { "epoch": 1.9434427785158475, "grad_norm": 72.68999481201172, "learning_rate": 9.974034274013242e-06, "loss": 0.0418, "step": 2560 }, { "epoch": 1.951034351869425, "grad_norm": 0.0027209515683352947, "learning_rate": 9.973127263722317e-06, "loss": 0.0042, "step": 2570 }, { "epoch": 1.9586259252230025, "grad_norm": 0.015417971648275852, "learning_rate": 9.972204725466316e-06, "loss": 0.2174, "step": 2580 }, { "epoch": 1.96621749857658, "grad_norm": 0.013561515137553215, "learning_rate": 9.971266662125749e-06, "loss": 0.0808, "step": 2590 }, { "epoch": 1.9738090719301575, "grad_norm": 72.93014526367188, "learning_rate": 9.9703130766296e-06, "loss": 0.2353, "step": 2600 }, { "epoch": 1.9814006452837352, "grad_norm": 0.028727278113365173, "learning_rate": 9.96934397195532e-06, "loss": 0.0344, "step": 2610 }, { "epoch": 1.9889922186373126, "grad_norm": 6.5093770027160645, "learning_rate": 9.96835935112882e-06, "loss": 0.3215, "step": 2620 }, { "epoch": 1.99658379199089, "grad_norm": 45.58213806152344, "learning_rate": 9.96735921722445e-06, "loss": 0.4849, "step": 2630 }, { "epoch": 1.9996204213323212, "eval_f1": 0.9241639816476168, "eval_loss": 0.15625236928462982, "eval_precision": 0.9250778152019562, "eval_recall": 0.9241274658573596, "eval_runtime": 75.5915, "eval_samples_per_second": 17.436, "eval_steps_per_second": 17.436, "step": 2634 }, { "epoch": 2.0041753653444676, "grad_norm": 0.07899657636880875, "learning_rate": 9.966343573365005e-06, "loss": 0.0937, "step": 2640 }, { "epoch": 2.0117669386980452, "grad_norm": 1.1364494562149048, "learning_rate": 9.965312422721705e-06, "loss": 0.0372, "step": 2650 }, { "epoch": 2.019358512051623, "grad_norm": 0.009463181719183922, "learning_rate": 9.964265768514189e-06, "loss": 0.1315, "step": 2660 }, { "epoch": 2.0269500854052, "grad_norm": 152.41160583496094, "learning_rate": 9.963203614010502e-06, "loss": 0.1601, "step": 2670 }, { "epoch": 2.0345416587587777, "grad_norm": 12.7033109664917, "learning_rate": 9.962125962527088e-06, "loss": 0.1492, "step": 2680 }, { "epoch": 2.0421332321123553, "grad_norm": 0.1103023886680603, "learning_rate": 9.961032817428779e-06, "loss": 0.044, "step": 2690 }, { "epoch": 2.049724805465933, "grad_norm": 0.04437507316470146, "learning_rate": 9.959924182128784e-06, "loss": 0.2004, "step": 2700 }, { "epoch": 2.05731637881951, "grad_norm": 0.016279350966215134, "learning_rate": 9.958800060088675e-06, "loss": 0.0789, "step": 2710 }, { "epoch": 2.0649079521730878, "grad_norm": 0.06195428967475891, "learning_rate": 9.957660454818385e-06, "loss": 0.1212, "step": 2720 }, { "epoch": 2.0724995255266654, "grad_norm": 0.07117705792188644, "learning_rate": 9.956505369876187e-06, "loss": 0.1124, "step": 2730 }, { "epoch": 2.080091098880243, "grad_norm": 0.0017620900180190802, "learning_rate": 9.955334808868686e-06, "loss": 0.2135, "step": 2740 }, { "epoch": 2.0876826722338206, "grad_norm": 0.0784306600689888, "learning_rate": 9.954148775450816e-06, "loss": 0.0047, "step": 2750 }, { "epoch": 2.095274245587398, "grad_norm": 0.014996266923844814, "learning_rate": 9.952947273325815e-06, "loss": 0.0063, "step": 2760 }, { "epoch": 2.1028658189409755, "grad_norm": 3.2599010467529297, "learning_rate": 9.951730306245222e-06, "loss": 0.1602, "step": 2770 }, { "epoch": 2.110457392294553, "grad_norm": 0.016863863915205002, "learning_rate": 9.950497878008865e-06, "loss": 0.0317, "step": 2780 }, { "epoch": 2.1180489656481307, "grad_norm": 15.340392112731934, "learning_rate": 9.949249992464847e-06, "loss": 0.154, "step": 2790 }, { "epoch": 2.125640539001708, "grad_norm": 4.341642379760742, "learning_rate": 9.947986653509531e-06, "loss": 0.0257, "step": 2800 }, { "epoch": 2.1332321123552855, "grad_norm": 1.8507261276245117, "learning_rate": 9.946707865087538e-06, "loss": 0.1434, "step": 2810 }, { "epoch": 2.140823685708863, "grad_norm": 0.16088451445102692, "learning_rate": 9.94541363119172e-06, "loss": 0.0837, "step": 2820 }, { "epoch": 2.148415259062441, "grad_norm": 0.689831018447876, "learning_rate": 9.944103955863162e-06, "loss": 0.4116, "step": 2830 }, { "epoch": 2.1560068324160184, "grad_norm": 1.8963958024978638, "learning_rate": 9.94277884319116e-06, "loss": 0.1837, "step": 2840 }, { "epoch": 2.1635984057695956, "grad_norm": 0.024928750470280647, "learning_rate": 9.941438297313215e-06, "loss": 0.0743, "step": 2850 }, { "epoch": 2.1711899791231732, "grad_norm": 0.006995880510658026, "learning_rate": 9.940082322415008e-06, "loss": 0.0001, "step": 2860 }, { "epoch": 2.178781552476751, "grad_norm": 78.55364227294922, "learning_rate": 9.938710922730404e-06, "loss": 0.1252, "step": 2870 }, { "epoch": 2.1863731258303285, "grad_norm": 0.013810686767101288, "learning_rate": 9.937324102541424e-06, "loss": 0.0243, "step": 2880 }, { "epoch": 2.1939646991839057, "grad_norm": 0.007164845243096352, "learning_rate": 9.935921866178242e-06, "loss": 0.0583, "step": 2890 }, { "epoch": 2.2015562725374833, "grad_norm": 0.0043396539986133575, "learning_rate": 9.934504218019161e-06, "loss": 0.0862, "step": 2900 }, { "epoch": 2.209147845891061, "grad_norm": 0.007671877276152372, "learning_rate": 9.933071162490613e-06, "loss": 0.0016, "step": 2910 }, { "epoch": 2.2167394192446386, "grad_norm": 327.8991394042969, "learning_rate": 9.931622704067133e-06, "loss": 0.1624, "step": 2920 }, { "epoch": 2.224330992598216, "grad_norm": 16.11570167541504, "learning_rate": 9.93015884727135e-06, "loss": 0.2645, "step": 2930 }, { "epoch": 2.2319225659517934, "grad_norm": 0.005082719959318638, "learning_rate": 9.928679596673974e-06, "loss": 0.0002, "step": 2940 }, { "epoch": 2.239514139305371, "grad_norm": 0.01941937580704689, "learning_rate": 9.927184956893778e-06, "loss": 0.0612, "step": 2950 }, { "epoch": 2.2471057126589487, "grad_norm": 19.174551010131836, "learning_rate": 9.925674932597586e-06, "loss": 0.2042, "step": 2960 }, { "epoch": 2.2546972860125263, "grad_norm": 21.23321533203125, "learning_rate": 9.924149528500259e-06, "loss": 0.0703, "step": 2970 }, { "epoch": 2.2622888593661035, "grad_norm": 0.11990063637495041, "learning_rate": 9.922608749364684e-06, "loss": 0.1142, "step": 2980 }, { "epoch": 2.269880432719681, "grad_norm": 0.1152704656124115, "learning_rate": 9.921052600001746e-06, "loss": 0.102, "step": 2990 }, { "epoch": 2.2774720060732587, "grad_norm": 36.8327751159668, "learning_rate": 9.919481085270328e-06, "loss": 0.0215, "step": 3000 }, { "epoch": 2.2850635794268364, "grad_norm": 0.06316674500703812, "learning_rate": 9.917894210077285e-06, "loss": 0.1024, "step": 3010 }, { "epoch": 2.292655152780414, "grad_norm": 0.04541470482945442, "learning_rate": 9.916291979377436e-06, "loss": 0.21, "step": 3020 }, { "epoch": 2.300246726133991, "grad_norm": 2.5551743507385254, "learning_rate": 9.914674398173548e-06, "loss": 0.0009, "step": 3030 }, { "epoch": 2.307838299487569, "grad_norm": 0.0514085479080677, "learning_rate": 9.913041471516311e-06, "loss": 0.0674, "step": 3040 }, { "epoch": 2.3154298728411464, "grad_norm": 0.09069258719682693, "learning_rate": 9.911393204504339e-06, "loss": 0.1548, "step": 3050 }, { "epoch": 2.3230214461947236, "grad_norm": 0.0353839211165905, "learning_rate": 9.909729602284131e-06, "loss": 0.1214, "step": 3060 }, { "epoch": 2.3306130195483012, "grad_norm": 0.006493726279586554, "learning_rate": 9.908050670050081e-06, "loss": 0.0039, "step": 3070 }, { "epoch": 2.338204592901879, "grad_norm": 0.009368511848151684, "learning_rate": 9.906356413044443e-06, "loss": 0.0779, "step": 3080 }, { "epoch": 2.3457961662554565, "grad_norm": 0.011731524951756, "learning_rate": 9.90464683655732e-06, "loss": 0.1077, "step": 3090 }, { "epoch": 2.353387739609034, "grad_norm": 63.11314392089844, "learning_rate": 9.902921945926653e-06, "loss": 0.2824, "step": 3100 }, { "epoch": 2.3609793129626113, "grad_norm": 0.0035196368116885424, "learning_rate": 9.901181746538196e-06, "loss": 0.0024, "step": 3110 }, { "epoch": 2.368570886316189, "grad_norm": 0.0374101847410202, "learning_rate": 9.8994262438255e-06, "loss": 0.012, "step": 3120 }, { "epoch": 2.3761624596697666, "grad_norm": 16.60328483581543, "learning_rate": 9.897833211571187e-06, "loss": 0.466, "step": 3130 }, { "epoch": 2.383754033023344, "grad_norm": 21.628568649291992, "learning_rate": 9.896048647683e-06, "loss": 0.1202, "step": 3140 }, { "epoch": 2.3913456063769214, "grad_norm": 15.491986274719238, "learning_rate": 9.894248796498034e-06, "loss": 0.056, "step": 3150 }, { "epoch": 2.398937179730499, "grad_norm": 0.009366643615067005, "learning_rate": 9.892433663636095e-06, "loss": 0.0003, "step": 3160 }, { "epoch": 2.4065287530840767, "grad_norm": 75.25447082519531, "learning_rate": 9.890603254764708e-06, "loss": 0.1785, "step": 3170 }, { "epoch": 2.4141203264376543, "grad_norm": 0.010000905022025108, "learning_rate": 9.888757575599095e-06, "loss": 0.1125, "step": 3180 }, { "epoch": 2.421711899791232, "grad_norm": 0.21319662034511566, "learning_rate": 9.886896631902156e-06, "loss": 0.0575, "step": 3190 }, { "epoch": 2.429303473144809, "grad_norm": 6.481915473937988, "learning_rate": 9.885020429484457e-06, "loss": 0.2689, "step": 3200 }, { "epoch": 2.4368950464983867, "grad_norm": 0.20284566283226013, "learning_rate": 9.8831289742042e-06, "loss": 0.149, "step": 3210 }, { "epoch": 2.4444866198519644, "grad_norm": 1.3910574913024902, "learning_rate": 9.881222271967224e-06, "loss": 0.0142, "step": 3220 }, { "epoch": 2.452078193205542, "grad_norm": 0.09682253748178482, "learning_rate": 9.879300328726958e-06, "loss": 0.0021, "step": 3230 }, { "epoch": 2.459669766559119, "grad_norm": 0.005042471457272768, "learning_rate": 9.877363150484434e-06, "loss": 0.2168, "step": 3240 }, { "epoch": 2.467261339912697, "grad_norm": 64.47718811035156, "learning_rate": 9.875410743288246e-06, "loss": 0.1994, "step": 3250 }, { "epoch": 2.4748529132662744, "grad_norm": 0.2548009753227234, "learning_rate": 9.873443113234541e-06, "loss": 0.2271, "step": 3260 }, { "epoch": 2.482444486619852, "grad_norm": 0.008805714547634125, "learning_rate": 9.871460266466996e-06, "loss": 0.0827, "step": 3270 }, { "epoch": 2.4900360599734297, "grad_norm": 0.05888598784804344, "learning_rate": 9.8694622091768e-06, "loss": 0.054, "step": 3280 }, { "epoch": 2.497627633327007, "grad_norm": 0.004817333538085222, "learning_rate": 9.867448947602637e-06, "loss": 0.105, "step": 3290 }, { "epoch": 2.5052192066805845, "grad_norm": 0.04850906506180763, "learning_rate": 9.865420488030664e-06, "loss": 0.2363, "step": 3300 }, { "epoch": 2.512810780034162, "grad_norm": 0.14938922226428986, "learning_rate": 9.86337683679449e-06, "loss": 0.1593, "step": 3310 }, { "epoch": 2.5204023533877398, "grad_norm": 18.9013729095459, "learning_rate": 9.861318000275158e-06, "loss": 0.2351, "step": 3320 }, { "epoch": 2.527993926741317, "grad_norm": 0.025823410600423813, "learning_rate": 9.85924398490113e-06, "loss": 0.0022, "step": 3330 }, { "epoch": 2.5355855000948946, "grad_norm": 28.33924674987793, "learning_rate": 9.857154797148255e-06, "loss": 0.2312, "step": 3340 }, { "epoch": 2.543177073448472, "grad_norm": 0.001974069746211171, "learning_rate": 9.855050443539761e-06, "loss": 0.0002, "step": 3350 }, { "epoch": 2.55076864680205, "grad_norm": 21.997047424316406, "learning_rate": 9.852930930646228e-06, "loss": 0.1257, "step": 3360 }, { "epoch": 2.5583602201556275, "grad_norm": 0.48950299620628357, "learning_rate": 9.850796265085567e-06, "loss": 0.0062, "step": 3370 }, { "epoch": 2.5659517935092047, "grad_norm": 8.470258712768555, "learning_rate": 9.848646453523005e-06, "loss": 0.0585, "step": 3380 }, { "epoch": 2.5735433668627823, "grad_norm": 0.11571002751588821, "learning_rate": 9.846481502671056e-06, "loss": 0.0329, "step": 3390 }, { "epoch": 2.58113494021636, "grad_norm": 11.877908706665039, "learning_rate": 9.844301419289511e-06, "loss": 0.2921, "step": 3400 }, { "epoch": 2.588726513569937, "grad_norm": 36.33771896362305, "learning_rate": 9.842106210185403e-06, "loss": 0.2223, "step": 3410 }, { "epoch": 2.5963180869235147, "grad_norm": 2.979523181915283, "learning_rate": 9.839895882212997e-06, "loss": 0.0653, "step": 3420 }, { "epoch": 2.6039096602770924, "grad_norm": 0.013308779336512089, "learning_rate": 9.837670442273768e-06, "loss": 0.0735, "step": 3430 }, { "epoch": 2.61150123363067, "grad_norm": 111.11514282226562, "learning_rate": 9.835429897316367e-06, "loss": 0.1495, "step": 3440 }, { "epoch": 2.6190928069842476, "grad_norm": 0.007320565637201071, "learning_rate": 9.833174254336618e-06, "loss": 0.3018, "step": 3450 }, { "epoch": 2.6266843803378253, "grad_norm": 0.010831023566424847, "learning_rate": 9.830903520377482e-06, "loss": 0.0203, "step": 3460 }, { "epoch": 2.6342759536914024, "grad_norm": 18.389625549316406, "learning_rate": 9.82861770252904e-06, "loss": 0.1973, "step": 3470 }, { "epoch": 2.64186752704498, "grad_norm": 12.364988327026367, "learning_rate": 9.826316807928468e-06, "loss": 0.0988, "step": 3480 }, { "epoch": 2.6494591003985577, "grad_norm": 0.0008839545771479607, "learning_rate": 9.824000843760028e-06, "loss": 0.0552, "step": 3490 }, { "epoch": 2.657050673752135, "grad_norm": 0.028787225484848022, "learning_rate": 9.821669817255021e-06, "loss": 0.1918, "step": 3500 }, { "epoch": 2.6646422471057125, "grad_norm": 0.007524173706769943, "learning_rate": 9.819323735691787e-06, "loss": 0.0056, "step": 3510 }, { "epoch": 2.67223382045929, "grad_norm": 7.9602837562561035, "learning_rate": 9.816962606395668e-06, "loss": 0.1273, "step": 3520 }, { "epoch": 2.6798253938128678, "grad_norm": 15.868315696716309, "learning_rate": 9.814586436738998e-06, "loss": 0.0943, "step": 3530 }, { "epoch": 2.6874169671664454, "grad_norm": 0.2785890996456146, "learning_rate": 9.812195234141064e-06, "loss": 0.1291, "step": 3540 }, { "epoch": 2.695008540520023, "grad_norm": 63.62078857421875, "learning_rate": 9.809789006068097e-06, "loss": 0.0672, "step": 3550 }, { "epoch": 2.7026001138736, "grad_norm": 2.8807220458984375, "learning_rate": 9.807367760033245e-06, "loss": 0.217, "step": 3560 }, { "epoch": 2.710191687227178, "grad_norm": 36.00885009765625, "learning_rate": 9.80493150359654e-06, "loss": 0.1016, "step": 3570 }, { "epoch": 2.7177832605807555, "grad_norm": 0.021623503416776657, "learning_rate": 9.80248024436489e-06, "loss": 0.2195, "step": 3580 }, { "epoch": 2.7253748339343327, "grad_norm": 0.03640507906675339, "learning_rate": 9.800013989992042e-06, "loss": 0.0001, "step": 3590 }, { "epoch": 2.7329664072879103, "grad_norm": 27.120119094848633, "learning_rate": 9.797532748178566e-06, "loss": 0.4964, "step": 3600 }, { "epoch": 2.740557980641488, "grad_norm": 0.08877989649772644, "learning_rate": 9.795036526671828e-06, "loss": 0.0498, "step": 3610 }, { "epoch": 2.7481495539950656, "grad_norm": 0.0727711170911789, "learning_rate": 9.792525333265965e-06, "loss": 0.1452, "step": 3620 }, { "epoch": 2.755741127348643, "grad_norm": 0.21834716200828552, "learning_rate": 9.789999175801866e-06, "loss": 0.1315, "step": 3630 }, { "epoch": 2.763332700702221, "grad_norm": 3.933009147644043, "learning_rate": 9.787458062167135e-06, "loss": 0.0726, "step": 3640 }, { "epoch": 2.770924274055798, "grad_norm": 0.00495730759575963, "learning_rate": 9.784902000296084e-06, "loss": 0.0092, "step": 3650 }, { "epoch": 2.7785158474093756, "grad_norm": 0.06244872510433197, "learning_rate": 9.782330998169695e-06, "loss": 0.2204, "step": 3660 }, { "epoch": 2.7861074207629533, "grad_norm": 0.032471269369125366, "learning_rate": 9.779745063815598e-06, "loss": 0.0887, "step": 3670 }, { "epoch": 2.7936989941165304, "grad_norm": 0.0014243993209674954, "learning_rate": 9.777144205308049e-06, "loss": 0.1105, "step": 3680 }, { "epoch": 2.801290567470108, "grad_norm": 69.43852233886719, "learning_rate": 9.774528430767902e-06, "loss": 0.0603, "step": 3690 }, { "epoch": 2.8088821408236857, "grad_norm": 0.06080542132258415, "learning_rate": 9.771897748362583e-06, "loss": 0.0163, "step": 3700 }, { "epoch": 2.8164737141772633, "grad_norm": 0.09897174686193466, "learning_rate": 9.769252166306066e-06, "loss": 0.1167, "step": 3710 }, { "epoch": 2.824065287530841, "grad_norm": 0.20604291558265686, "learning_rate": 9.766591692858854e-06, "loss": 0.0706, "step": 3720 }, { "epoch": 2.831656860884418, "grad_norm": 32.105499267578125, "learning_rate": 9.763916336327935e-06, "loss": 0.5321, "step": 3730 }, { "epoch": 2.8392484342379958, "grad_norm": 0.00609110202640295, "learning_rate": 9.761226105066778e-06, "loss": 0.0794, "step": 3740 }, { "epoch": 2.8468400075915734, "grad_norm": 0.14252524077892303, "learning_rate": 9.75852100747529e-06, "loss": 0.1037, "step": 3750 }, { "epoch": 2.854431580945151, "grad_norm": 0.0007404695497825742, "learning_rate": 9.7558010519998e-06, "loss": 0.0552, "step": 3760 }, { "epoch": 2.8620231542987282, "grad_norm": 0.007310529239475727, "learning_rate": 9.753066247133025e-06, "loss": 0.009, "step": 3770 }, { "epoch": 2.869614727652306, "grad_norm": 88.26655578613281, "learning_rate": 9.750316601414051e-06, "loss": 0.1008, "step": 3780 }, { "epoch": 2.8772063010058835, "grad_norm": 0.01418048795312643, "learning_rate": 9.7475521234283e-06, "loss": 0.0262, "step": 3790 }, { "epoch": 2.884797874359461, "grad_norm": 0.06487419456243515, "learning_rate": 9.744772821807509e-06, "loss": 0.1206, "step": 3800 }, { "epoch": 2.8923894477130387, "grad_norm": 0.0070535228587687016, "learning_rate": 9.741978705229697e-06, "loss": 0.0897, "step": 3810 }, { "epoch": 2.899981021066616, "grad_norm": 1.5489246845245361, "learning_rate": 9.739169782419143e-06, "loss": 0.0008, "step": 3820 }, { "epoch": 2.9075725944201936, "grad_norm": 0.001165062771178782, "learning_rate": 9.736346062146356e-06, "loss": 0.0239, "step": 3830 }, { "epoch": 2.915164167773771, "grad_norm": 0.0013667664024978876, "learning_rate": 9.733507553228045e-06, "loss": 0.0017, "step": 3840 }, { "epoch": 2.9227557411273484, "grad_norm": 0.004272387828677893, "learning_rate": 9.7306542645271e-06, "loss": 0.1874, "step": 3850 }, { "epoch": 2.930347314480926, "grad_norm": 0.032470703125, "learning_rate": 9.727786204952554e-06, "loss": 0.0128, "step": 3860 }, { "epoch": 2.9379388878345036, "grad_norm": 0.010683764703571796, "learning_rate": 9.724903383459566e-06, "loss": 0.064, "step": 3870 }, { "epoch": 2.9455304611880813, "grad_norm": 11.981929779052734, "learning_rate": 9.722005809049382e-06, "loss": 0.2962, "step": 3880 }, { "epoch": 2.953122034541659, "grad_norm": 7.638548374176025, "learning_rate": 9.719093490769315e-06, "loss": 0.2084, "step": 3890 }, { "epoch": 2.9607136078952365, "grad_norm": 0.0027020114939659834, "learning_rate": 9.71616643771271e-06, "loss": 0.0521, "step": 3900 }, { "epoch": 2.9683051812488137, "grad_norm": 0.041696127504110336, "learning_rate": 9.713224659018927e-06, "loss": 0.1488, "step": 3910 }, { "epoch": 2.9758967546023913, "grad_norm": 276.02947998046875, "learning_rate": 9.710268163873298e-06, "loss": 0.1649, "step": 3920 }, { "epoch": 2.983488327955969, "grad_norm": 4.512789726257324, "learning_rate": 9.707296961507107e-06, "loss": 0.0364, "step": 3930 }, { "epoch": 2.991079901309546, "grad_norm": 0.07038887590169907, "learning_rate": 9.70431106119756e-06, "loss": 0.046, "step": 3940 }, { "epoch": 2.998671474663124, "grad_norm": 52.16018295288086, "learning_rate": 9.701310472267757e-06, "loss": 0.1439, "step": 3950 }, { "epoch": 2.999430631998482, "eval_f1": 0.9468203897167411, "eval_loss": 0.17982631921768188, "eval_precision": 0.9451027269774426, "eval_recall": 0.9468892261001517, "eval_runtime": 75.734, "eval_samples_per_second": 17.403, "eval_steps_per_second": 17.403, "step": 3951 }, { "epoch": 3.0062630480167014, "grad_norm": 0.003164840629324317, "learning_rate": 9.69829520408666e-06, "loss": 0.0687, "step": 3960 }, { "epoch": 3.013854621370279, "grad_norm": 0.005421197507530451, "learning_rate": 9.695265266069066e-06, "loss": 0.1768, "step": 3970 }, { "epoch": 3.0214461947238567, "grad_norm": 0.07668659836053848, "learning_rate": 9.692220667675572e-06, "loss": 0.0092, "step": 3980 }, { "epoch": 3.029037768077434, "grad_norm": 0.0020935048814862967, "learning_rate": 9.689161418412557e-06, "loss": 0.2435, "step": 3990 }, { "epoch": 3.0366293414310115, "grad_norm": 0.012631943449378014, "learning_rate": 9.68608752783214e-06, "loss": 0.0858, "step": 4000 }, { "epoch": 3.044220914784589, "grad_norm": 0.005341747775673866, "learning_rate": 9.682999005532161e-06, "loss": 0.0094, "step": 4010 }, { "epoch": 3.0518124881381667, "grad_norm": 0.02143806405365467, "learning_rate": 9.67989586115614e-06, "loss": 0.0031, "step": 4020 }, { "epoch": 3.0594040614917444, "grad_norm": 13.902883529663086, "learning_rate": 9.67677810439326e-06, "loss": 0.0965, "step": 4030 }, { "epoch": 3.0669956348453216, "grad_norm": 0.20893624424934387, "learning_rate": 9.67364574497832e-06, "loss": 0.107, "step": 4040 }, { "epoch": 3.074587208198899, "grad_norm": 0.18238410353660583, "learning_rate": 9.67049879269172e-06, "loss": 0.001, "step": 4050 }, { "epoch": 3.082178781552477, "grad_norm": 0.022665822878479958, "learning_rate": 9.667337257359425e-06, "loss": 0.1673, "step": 4060 }, { "epoch": 3.0897703549060545, "grad_norm": 10.807044982910156, "learning_rate": 9.664161148852932e-06, "loss": 0.0674, "step": 4070 }, { "epoch": 3.0973619282596316, "grad_norm": 0.0026043581310659647, "learning_rate": 9.660970477089238e-06, "loss": 0.0097, "step": 4080 }, { "epoch": 3.1049535016132093, "grad_norm": 18.194334030151367, "learning_rate": 9.657765252030815e-06, "loss": 0.0064, "step": 4090 }, { "epoch": 3.112545074966787, "grad_norm": 12.572392463684082, "learning_rate": 9.654545483685578e-06, "loss": 0.1343, "step": 4100 }, { "epoch": 3.1201366483203645, "grad_norm": 0.0007624260615557432, "learning_rate": 9.651311182106848e-06, "loss": 0.0325, "step": 4110 }, { "epoch": 3.1277282216739417, "grad_norm": 0.018368422985076904, "learning_rate": 9.648062357393325e-06, "loss": 0.0005, "step": 4120 }, { "epoch": 3.1353197950275193, "grad_norm": 78.8929443359375, "learning_rate": 9.644799019689056e-06, "loss": 0.054, "step": 4130 }, { "epoch": 3.142911368381097, "grad_norm": 0.010049775242805481, "learning_rate": 9.641521179183403e-06, "loss": 0.0157, "step": 4140 }, { "epoch": 3.1505029417346746, "grad_norm": 91.76640319824219, "learning_rate": 9.638228846111011e-06, "loss": 0.1893, "step": 4150 }, { "epoch": 3.1580945150882522, "grad_norm": 0.30123358964920044, "learning_rate": 9.634922030751777e-06, "loss": 0.2819, "step": 4160 }, { "epoch": 3.1656860884418294, "grad_norm": 32.838623046875, "learning_rate": 9.631600743430817e-06, "loss": 0.2494, "step": 4170 }, { "epoch": 3.173277661795407, "grad_norm": 0.1474120020866394, "learning_rate": 9.628264994518431e-06, "loss": 0.0401, "step": 4180 }, { "epoch": 3.1808692351489847, "grad_norm": 0.16810506582260132, "learning_rate": 9.624914794430078e-06, "loss": 0.0668, "step": 4190 }, { "epoch": 3.1884608085025623, "grad_norm": 1.5835288763046265, "learning_rate": 9.621550153626338e-06, "loss": 0.1177, "step": 4200 }, { "epoch": 3.1960523818561395, "grad_norm": 0.00022748277115169913, "learning_rate": 9.618171082612875e-06, "loss": 0.006, "step": 4210 }, { "epoch": 3.203643955209717, "grad_norm": 0.011720534414052963, "learning_rate": 9.614777591940419e-06, "loss": 0.0547, "step": 4220 }, { "epoch": 3.2112355285632947, "grad_norm": 16.759693145751953, "learning_rate": 9.611369692204712e-06, "loss": 0.0687, "step": 4230 }, { "epoch": 3.2188271019168724, "grad_norm": 13.746438026428223, "learning_rate": 9.6079473940465e-06, "loss": 0.1731, "step": 4240 }, { "epoch": 3.22641867527045, "grad_norm": 1.0661725997924805, "learning_rate": 9.604510708151472e-06, "loss": 0.0012, "step": 4250 }, { "epoch": 3.234010248624027, "grad_norm": 0.0051275817677378654, "learning_rate": 9.601059645250253e-06, "loss": 0.1559, "step": 4260 }, { "epoch": 3.241601821977605, "grad_norm": 0.03845924511551857, "learning_rate": 9.59759421611835e-06, "loss": 0.0414, "step": 4270 }, { "epoch": 3.2491933953311825, "grad_norm": 0.2744313180446625, "learning_rate": 9.594114431576133e-06, "loss": 0.2521, "step": 4280 }, { "epoch": 3.25678496868476, "grad_norm": 0.06969039887189865, "learning_rate": 9.590620302488792e-06, "loss": 0.1007, "step": 4290 }, { "epoch": 3.2643765420383373, "grad_norm": 0.044375017285346985, "learning_rate": 9.587111839766303e-06, "loss": 0.1706, "step": 4300 }, { "epoch": 3.271968115391915, "grad_norm": 0.008467442356050014, "learning_rate": 9.583589054363402e-06, "loss": 0.0518, "step": 4310 }, { "epoch": 3.2795596887454925, "grad_norm": 0.006757930386811495, "learning_rate": 9.580051957279545e-06, "loss": 0.1301, "step": 4320 }, { "epoch": 3.28715126209907, "grad_norm": 0.22480565309524536, "learning_rate": 9.57650055955887e-06, "loss": 0.2225, "step": 4330 }, { "epoch": 3.294742835452648, "grad_norm": 0.005938298534601927, "learning_rate": 9.572934872290175e-06, "loss": 0.1615, "step": 4340 }, { "epoch": 3.302334408806225, "grad_norm": 0.031019240617752075, "learning_rate": 9.569354906606864e-06, "loss": 0.0292, "step": 4350 }, { "epoch": 3.3099259821598026, "grad_norm": 0.058189138770103455, "learning_rate": 9.565760673686936e-06, "loss": 0.1437, "step": 4360 }, { "epoch": 3.3175175555133802, "grad_norm": 18.81794548034668, "learning_rate": 9.56215218475293e-06, "loss": 0.1732, "step": 4370 }, { "epoch": 3.325109128866958, "grad_norm": 0.037775713950395584, "learning_rate": 9.558529451071896e-06, "loss": 0.0048, "step": 4380 }, { "epoch": 3.332700702220535, "grad_norm": 0.014422253705561161, "learning_rate": 9.55489248395537e-06, "loss": 0.0021, "step": 4390 }, { "epoch": 3.3402922755741127, "grad_norm": 30.743995666503906, "learning_rate": 9.551241294759322e-06, "loss": 0.238, "step": 4400 }, { "epoch": 3.3478838489276903, "grad_norm": 1.6870224475860596, "learning_rate": 9.547575894884132e-06, "loss": 0.09, "step": 4410 }, { "epoch": 3.355475422281268, "grad_norm": 0.03549875691533089, "learning_rate": 9.54389629577455e-06, "loss": 0.163, "step": 4420 }, { "epoch": 3.3630669956348456, "grad_norm": 0.12179459631443024, "learning_rate": 9.540202508919663e-06, "loss": 0.0025, "step": 4430 }, { "epoch": 3.3706585689884228, "grad_norm": 0.000569705618545413, "learning_rate": 9.536494545852854e-06, "loss": 0.0433, "step": 4440 }, { "epoch": 3.3782501423420004, "grad_norm": 0.0051111155189573765, "learning_rate": 9.532772418151777e-06, "loss": 0.1015, "step": 4450 }, { "epoch": 3.385841715695578, "grad_norm": 0.0955556184053421, "learning_rate": 9.529036137438304e-06, "loss": 0.2303, "step": 4460 }, { "epoch": 3.393433289049155, "grad_norm": 0.02819570153951645, "learning_rate": 9.5252857153785e-06, "loss": 0.0003, "step": 4470 }, { "epoch": 3.401024862402733, "grad_norm": 0.005423153750598431, "learning_rate": 9.521521163682593e-06, "loss": 0.0102, "step": 4480 }, { "epoch": 3.4086164357563105, "grad_norm": 0.8613097667694092, "learning_rate": 9.517742494104918e-06, "loss": 0.0005, "step": 4490 }, { "epoch": 3.416208009109888, "grad_norm": 0.2508643567562103, "learning_rate": 9.513949718443898e-06, "loss": 0.0711, "step": 4500 }, { "epoch": 3.4237995824634657, "grad_norm": 0.026635829359292984, "learning_rate": 9.510142848541998e-06, "loss": 0.0596, "step": 4510 }, { "epoch": 3.431391155817043, "grad_norm": 0.0043787783943116665, "learning_rate": 9.50632189628569e-06, "loss": 0.3671, "step": 4520 }, { "epoch": 3.4389827291706205, "grad_norm": 0.05850038304924965, "learning_rate": 9.502486873605419e-06, "loss": 0.1132, "step": 4530 }, { "epoch": 3.446574302524198, "grad_norm": 157.52146911621094, "learning_rate": 9.49863779247556e-06, "loss": 0.1559, "step": 4540 }, { "epoch": 3.454165875877776, "grad_norm": 0.02441789209842682, "learning_rate": 9.494774664914385e-06, "loss": 0.0658, "step": 4550 }, { "epoch": 3.461757449231353, "grad_norm": 1.3454347848892212, "learning_rate": 9.490897502984028e-06, "loss": 0.0128, "step": 4560 }, { "epoch": 3.4693490225849306, "grad_norm": 0.012022917158901691, "learning_rate": 9.487006318790435e-06, "loss": 0.0266, "step": 4570 }, { "epoch": 3.4769405959385082, "grad_norm": 0.01288307923823595, "learning_rate": 9.483101124483345e-06, "loss": 0.0001, "step": 4580 }, { "epoch": 3.484532169292086, "grad_norm": 26.168624877929688, "learning_rate": 9.479181932256232e-06, "loss": 0.0258, "step": 4590 }, { "epoch": 3.4921237426456635, "grad_norm": 0.004901974927634001, "learning_rate": 9.475248754346282e-06, "loss": 0.1046, "step": 4600 }, { "epoch": 3.4997153159992407, "grad_norm": 0.001919193658977747, "learning_rate": 9.471301603034353e-06, "loss": 0.0766, "step": 4610 }, { "epoch": 3.5073068893528183, "grad_norm": 0.030080076307058334, "learning_rate": 9.467340490644923e-06, "loss": 0.0022, "step": 4620 }, { "epoch": 3.514898462706396, "grad_norm": 0.041573066264390945, "learning_rate": 9.463365429546073e-06, "loss": 0.0357, "step": 4630 }, { "epoch": 3.5224900360599736, "grad_norm": 30.251873016357422, "learning_rate": 9.459376432149429e-06, "loss": 0.0533, "step": 4640 }, { "epoch": 3.5300816094135508, "grad_norm": 58.92287826538086, "learning_rate": 9.455373510910135e-06, "loss": 0.1241, "step": 4650 }, { "epoch": 3.5376731827671284, "grad_norm": 0.015299913473427296, "learning_rate": 9.45135667832681e-06, "loss": 0.0672, "step": 4660 }, { "epoch": 3.545264756120706, "grad_norm": 0.024773746728897095, "learning_rate": 9.447325946941509e-06, "loss": 0.0002, "step": 4670 }, { "epoch": 3.5528563294742836, "grad_norm": 0.0013335061958059669, "learning_rate": 9.443281329339682e-06, "loss": 0.0002, "step": 4680 }, { "epoch": 3.5604479028278613, "grad_norm": 0.003542415564879775, "learning_rate": 9.439222838150141e-06, "loss": 0.0053, "step": 4690 }, { "epoch": 3.5680394761814385, "grad_norm": 0.004198325797915459, "learning_rate": 9.435150486045019e-06, "loss": 0.0021, "step": 4700 }, { "epoch": 3.575631049535016, "grad_norm": 0.012465923093259335, "learning_rate": 9.431064285739717e-06, "loss": 0.391, "step": 4710 }, { "epoch": 3.5832226228885937, "grad_norm": 19.51753044128418, "learning_rate": 9.426964249992885e-06, "loss": 0.0163, "step": 4720 }, { "epoch": 3.5908141962421714, "grad_norm": 15.74682903289795, "learning_rate": 9.42285039160637e-06, "loss": 0.1393, "step": 4730 }, { "epoch": 3.5984057695957485, "grad_norm": 0.001853258814662695, "learning_rate": 9.418722723425179e-06, "loss": 0.1333, "step": 4740 }, { "epoch": 3.605997342949326, "grad_norm": 0.00429703202098608, "learning_rate": 9.414581258337433e-06, "loss": 0.041, "step": 4750 }, { "epoch": 3.613588916302904, "grad_norm": 0.019961683079600334, "learning_rate": 9.410426009274343e-06, "loss": 0.0041, "step": 4760 }, { "epoch": 3.6211804896564814, "grad_norm": 0.003665096592158079, "learning_rate": 9.406256989210146e-06, "loss": 0.1252, "step": 4770 }, { "epoch": 3.628772063010059, "grad_norm": 59.87676239013672, "learning_rate": 9.402074211162086e-06, "loss": 0.2175, "step": 4780 }, { "epoch": 3.6363636363636362, "grad_norm": 0.0013629102613776922, "learning_rate": 9.397877688190362e-06, "loss": 0.026, "step": 4790 }, { "epoch": 3.643955209717214, "grad_norm": 0.004092271439731121, "learning_rate": 9.39366743339809e-06, "loss": 0.0061, "step": 4800 }, { "epoch": 3.6515467830707915, "grad_norm": 0.06597864627838135, "learning_rate": 9.38944345993126e-06, "loss": 0.0974, "step": 4810 }, { "epoch": 3.6591383564243687, "grad_norm": 0.0014479252276942134, "learning_rate": 9.3852057809787e-06, "loss": 0.1248, "step": 4820 }, { "epoch": 3.6667299297779463, "grad_norm": 0.0007850687834434211, "learning_rate": 9.380954409772029e-06, "loss": 0.0674, "step": 4830 }, { "epoch": 3.674321503131524, "grad_norm": 0.009199988096952438, "learning_rate": 9.376689359585623e-06, "loss": 0.0707, "step": 4840 }, { "epoch": 3.6819130764851016, "grad_norm": 0.001353310770355165, "learning_rate": 9.37241064373656e-06, "loss": 0.0001, "step": 4850 }, { "epoch": 3.689504649838679, "grad_norm": 0.0004105101979803294, "learning_rate": 9.368118275584596e-06, "loss": 0.0161, "step": 4860 }, { "epoch": 3.697096223192257, "grad_norm": 0.005007717292755842, "learning_rate": 9.36381226853211e-06, "loss": 0.0854, "step": 4870 }, { "epoch": 3.704687796545834, "grad_norm": 0.001610257662832737, "learning_rate": 9.359492636024067e-06, "loss": 0.0002, "step": 4880 }, { "epoch": 3.7122793698994117, "grad_norm": 0.0029359892942011356, "learning_rate": 9.35515939154798e-06, "loss": 0.0001, "step": 4890 }, { "epoch": 3.7198709432529893, "grad_norm": 0.016431191936135292, "learning_rate": 9.350812548633862e-06, "loss": 0.0407, "step": 4900 }, { "epoch": 3.7274625166065665, "grad_norm": 0.00021083364845253527, "learning_rate": 9.346452120854176e-06, "loss": 0.0001, "step": 4910 }, { "epoch": 3.735054089960144, "grad_norm": 0.0014973161742091179, "learning_rate": 9.342078121823817e-06, "loss": 0.2248, "step": 4920 }, { "epoch": 3.7426456633137217, "grad_norm": 0.01354212500154972, "learning_rate": 9.337690565200042e-06, "loss": 0.07, "step": 4930 }, { "epoch": 3.7502372366672994, "grad_norm": 0.07265155762434006, "learning_rate": 9.333289464682452e-06, "loss": 0.0486, "step": 4940 }, { "epoch": 3.757828810020877, "grad_norm": 0.0004681596765294671, "learning_rate": 9.328874834012925e-06, "loss": 0.0063, "step": 4950 }, { "epoch": 3.7654203833744546, "grad_norm": 0.01314933318644762, "learning_rate": 9.324446686975592e-06, "loss": 0.0853, "step": 4960 }, { "epoch": 3.773011956728032, "grad_norm": 0.00873385276645422, "learning_rate": 9.320005037396787e-06, "loss": 0.0936, "step": 4970 }, { "epoch": 3.7806035300816094, "grad_norm": 10.59278678894043, "learning_rate": 9.315549899145001e-06, "loss": 0.1606, "step": 4980 }, { "epoch": 3.788195103435187, "grad_norm": 0.0031807045452296734, "learning_rate": 9.311081286130846e-06, "loss": 0.1216, "step": 4990 }, { "epoch": 3.7957866767887642, "grad_norm": 0.15921778976917267, "learning_rate": 9.306599212307001e-06, "loss": 0.1834, "step": 5000 }, { "epoch": 3.803378250142342, "grad_norm": 0.24746917188167572, "learning_rate": 9.302103691668182e-06, "loss": 0.0025, "step": 5010 }, { "epoch": 3.8109698234959195, "grad_norm": 23.347986221313477, "learning_rate": 9.297594738251085e-06, "loss": 0.155, "step": 5020 }, { "epoch": 3.818561396849497, "grad_norm": 10.753530502319336, "learning_rate": 9.293072366134353e-06, "loss": 0.1938, "step": 5030 }, { "epoch": 3.8261529702030748, "grad_norm": 11.585359573364258, "learning_rate": 9.288536589438523e-06, "loss": 0.0768, "step": 5040 }, { "epoch": 3.833744543556652, "grad_norm": 0.035775743424892426, "learning_rate": 9.283987422325988e-06, "loss": 0.0124, "step": 5050 }, { "epoch": 3.8413361169102296, "grad_norm": 0.008631790988147259, "learning_rate": 9.279424879000948e-06, "loss": 0.0634, "step": 5060 }, { "epoch": 3.848927690263807, "grad_norm": 8.152615547180176, "learning_rate": 9.274848973709378e-06, "loss": 0.0008, "step": 5070 }, { "epoch": 3.856519263617385, "grad_norm": 0.00742849987000227, "learning_rate": 9.270259720738962e-06, "loss": 0.0023, "step": 5080 }, { "epoch": 3.864110836970962, "grad_norm": 0.00474806921556592, "learning_rate": 9.265657134419068e-06, "loss": 0.0822, "step": 5090 }, { "epoch": 3.8717024103245397, "grad_norm": 0.04680832102894783, "learning_rate": 9.261041229120693e-06, "loss": 0.4435, "step": 5100 }, { "epoch": 3.8792939836781173, "grad_norm": 0.05589527264237404, "learning_rate": 9.25641201925642e-06, "loss": 0.0161, "step": 5110 }, { "epoch": 3.886885557031695, "grad_norm": 0.0864788219332695, "learning_rate": 9.251769519280377e-06, "loss": 0.0042, "step": 5120 }, { "epoch": 3.8944771303852725, "grad_norm": 0.0046981326304376125, "learning_rate": 9.247113743688188e-06, "loss": 0.0202, "step": 5130 }, { "epoch": 3.9020687037388497, "grad_norm": 0.008091968484222889, "learning_rate": 9.242444707016924e-06, "loss": 0.0255, "step": 5140 }, { "epoch": 3.9096602770924274, "grad_norm": 0.016733279451727867, "learning_rate": 9.237762423845067e-06, "loss": 0.0609, "step": 5150 }, { "epoch": 3.917251850446005, "grad_norm": 110.93751525878906, "learning_rate": 9.233066908792459e-06, "loss": 0.0854, "step": 5160 }, { "epoch": 3.9248434237995826, "grad_norm": 0.0014931544428691268, "learning_rate": 9.228358176520256e-06, "loss": 0.5116, "step": 5170 }, { "epoch": 3.93243499715316, "grad_norm": 0.013354528695344925, "learning_rate": 9.22363624173088e-06, "loss": 0.1488, "step": 5180 }, { "epoch": 3.9400265705067374, "grad_norm": 0.00550916837528348, "learning_rate": 9.218901119167983e-06, "loss": 0.3537, "step": 5190 }, { "epoch": 3.947618143860315, "grad_norm": 29.100811004638672, "learning_rate": 9.214152823616385e-06, "loss": 0.2662, "step": 5200 }, { "epoch": 3.9552097172138927, "grad_norm": 0.0014990021008998156, "learning_rate": 9.209391369902048e-06, "loss": 0.2909, "step": 5210 }, { "epoch": 3.9628012905674703, "grad_norm": 0.2769727110862732, "learning_rate": 9.20461677289201e-06, "loss": 0.131, "step": 5220 }, { "epoch": 3.9703928639210475, "grad_norm": 0.04668630287051201, "learning_rate": 9.199829047494351e-06, "loss": 0.001, "step": 5230 }, { "epoch": 3.977984437274625, "grad_norm": 0.005737427622079849, "learning_rate": 9.195028208658143e-06, "loss": 0.1876, "step": 5240 }, { "epoch": 3.9855760106282028, "grad_norm": 0.0012742755934596062, "learning_rate": 9.190214271373399e-06, "loss": 0.0296, "step": 5250 }, { "epoch": 3.99316758398178, "grad_norm": 0.23183897137641907, "learning_rate": 9.185387250671037e-06, "loss": 0.0464, "step": 5260 }, { "epoch": 4.0, "eval_f1": 0.9484240795008525, "eval_loss": 0.1520499438047409, "eval_precision": 0.9488593551067371, "eval_recall": 0.9484066767830045, "eval_runtime": 75.764, "eval_samples_per_second": 17.396, "eval_steps_per_second": 17.396, "step": 5269 }, { "epoch": 4.000759157335358, "grad_norm": 0.015655217692255974, "learning_rate": 9.18054716162282e-06, "loss": 0.0778, "step": 5270 }, { "epoch": 4.008350730688935, "grad_norm": 51.39549255371094, "learning_rate": 9.175694019341321e-06, "loss": 0.1821, "step": 5280 }, { "epoch": 4.015942304042513, "grad_norm": 20.591053009033203, "learning_rate": 9.170827838979864e-06, "loss": 0.0411, "step": 5290 }, { "epoch": 4.0235338773960905, "grad_norm": 0.00464022858068347, "learning_rate": 9.165948635732487e-06, "loss": 0.024, "step": 5300 }, { "epoch": 4.031125450749668, "grad_norm": 0.9038947820663452, "learning_rate": 9.161056424833888e-06, "loss": 0.1133, "step": 5310 }, { "epoch": 4.038717024103246, "grad_norm": 104.494384765625, "learning_rate": 9.156151221559384e-06, "loss": 0.0752, "step": 5320 }, { "epoch": 4.046308597456823, "grad_norm": 0.003295379225164652, "learning_rate": 9.151233041224851e-06, "loss": 0.0697, "step": 5330 }, { "epoch": 4.0539001708104, "grad_norm": 0.0672566145658493, "learning_rate": 9.146301899186696e-06, "loss": 0.0149, "step": 5340 }, { "epoch": 4.061491744163978, "grad_norm": 0.020139316096901894, "learning_rate": 9.141357810841785e-06, "loss": 0.0004, "step": 5350 }, { "epoch": 4.069083317517555, "grad_norm": 0.18405619263648987, "learning_rate": 9.136400791627414e-06, "loss": 0.0003, "step": 5360 }, { "epoch": 4.076674890871133, "grad_norm": 0.011098051443696022, "learning_rate": 9.131430857021252e-06, "loss": 0.1502, "step": 5370 }, { "epoch": 4.084266464224711, "grad_norm": 0.0007754967082291842, "learning_rate": 9.126448022541296e-06, "loss": 0.1435, "step": 5380 }, { "epoch": 4.091858037578288, "grad_norm": 0.059689611196517944, "learning_rate": 9.121452303745823e-06, "loss": 0.2681, "step": 5390 }, { "epoch": 4.099449610931866, "grad_norm": 23.187213897705078, "learning_rate": 9.116443716233336e-06, "loss": 0.0408, "step": 5400 }, { "epoch": 4.1070411842854435, "grad_norm": 0.022440658882260323, "learning_rate": 9.111422275642518e-06, "loss": 0.0499, "step": 5410 }, { "epoch": 4.11463275763902, "grad_norm": 0.04940136522054672, "learning_rate": 9.10638799765219e-06, "loss": 0.0007, "step": 5420 }, { "epoch": 4.122224330992598, "grad_norm": 0.0109120924025774, "learning_rate": 9.101340897981247e-06, "loss": 0.0577, "step": 5430 }, { "epoch": 4.1298159043461755, "grad_norm": 15.833015441894531, "learning_rate": 9.096280992388629e-06, "loss": 0.0016, "step": 5440 }, { "epoch": 4.137407477699753, "grad_norm": 0.002290463075041771, "learning_rate": 9.091208296673253e-06, "loss": 0.0022, "step": 5450 }, { "epoch": 4.144999051053331, "grad_norm": 0.006408984772861004, "learning_rate": 9.086122826673976e-06, "loss": 0.0004, "step": 5460 }, { "epoch": 4.152590624406908, "grad_norm": 0.04329880699515343, "learning_rate": 9.081024598269537e-06, "loss": 0.0001, "step": 5470 }, { "epoch": 4.160182197760486, "grad_norm": 0.0005604320904240012, "learning_rate": 9.075913627378515e-06, "loss": 0.1444, "step": 5480 }, { "epoch": 4.167773771114064, "grad_norm": 0.0035607500467449427, "learning_rate": 9.070789929959273e-06, "loss": 0.0705, "step": 5490 }, { "epoch": 4.175365344467641, "grad_norm": 21.509424209594727, "learning_rate": 9.065653522009914e-06, "loss": 0.0963, "step": 5500 }, { "epoch": 4.182956917821218, "grad_norm": 0.040827080607414246, "learning_rate": 9.060504419568226e-06, "loss": 0.2367, "step": 5510 }, { "epoch": 4.190548491174796, "grad_norm": 0.03268290311098099, "learning_rate": 9.055342638711636e-06, "loss": 0.1356, "step": 5520 }, { "epoch": 4.198140064528373, "grad_norm": 0.02690727449953556, "learning_rate": 9.050168195557152e-06, "loss": 0.1927, "step": 5530 }, { "epoch": 4.205731637881951, "grad_norm": 0.0010843976633623242, "learning_rate": 9.044981106261327e-06, "loss": 0.03, "step": 5540 }, { "epoch": 4.2133232112355286, "grad_norm": 0.017938513308763504, "learning_rate": 9.039781387020195e-06, "loss": 0.0011, "step": 5550 }, { "epoch": 4.220914784589106, "grad_norm": 0.11831680685281754, "learning_rate": 9.034569054069222e-06, "loss": 0.0028, "step": 5560 }, { "epoch": 4.228506357942684, "grad_norm": 0.0017340222839266062, "learning_rate": 9.029344123683269e-06, "loss": 0.0004, "step": 5570 }, { "epoch": 4.236097931296261, "grad_norm": 45.62750244140625, "learning_rate": 9.024106612176519e-06, "loss": 0.199, "step": 5580 }, { "epoch": 4.243689504649839, "grad_norm": 0.00023749677347950637, "learning_rate": 9.019382108477498e-06, "loss": 0.0737, "step": 5590 }, { "epoch": 4.251281078003416, "grad_norm": 0.0017125029116868973, "learning_rate": 9.014120737927479e-06, "loss": 0.0038, "step": 5600 }, { "epoch": 4.258872651356993, "grad_norm": 0.005647186189889908, "learning_rate": 9.008846833789777e-06, "loss": 0.0524, "step": 5610 }, { "epoch": 4.266464224710571, "grad_norm": 0.02812052331864834, "learning_rate": 9.003560412531492e-06, "loss": 0.0008, "step": 5620 }, { "epoch": 4.274055798064149, "grad_norm": 0.004697522614151239, "learning_rate": 8.99826149065881e-06, "loss": 0.022, "step": 5630 }, { "epoch": 4.281647371417726, "grad_norm": 0.000999168842099607, "learning_rate": 8.992950084716952e-06, "loss": 0.0255, "step": 5640 }, { "epoch": 4.289238944771304, "grad_norm": 0.00024819112149998546, "learning_rate": 8.987626211290112e-06, "loss": 0.1814, "step": 5650 }, { "epoch": 4.296830518124882, "grad_norm": 15.028079986572266, "learning_rate": 8.982289887001419e-06, "loss": 0.0483, "step": 5660 }, { "epoch": 4.304422091478459, "grad_norm": 0.012629321776330471, "learning_rate": 8.976941128512873e-06, "loss": 0.0727, "step": 5670 }, { "epoch": 4.312013664832037, "grad_norm": 0.02232271246612072, "learning_rate": 8.9715799525253e-06, "loss": 0.1076, "step": 5680 }, { "epoch": 4.319605238185614, "grad_norm": 0.013221162371337414, "learning_rate": 8.966206375778302e-06, "loss": 0.1304, "step": 5690 }, { "epoch": 4.327196811539191, "grad_norm": 20.240745544433594, "learning_rate": 8.960820415050193e-06, "loss": 0.0818, "step": 5700 }, { "epoch": 4.334788384892769, "grad_norm": 0.9472859501838684, "learning_rate": 8.955422087157962e-06, "loss": 0.0875, "step": 5710 }, { "epoch": 4.3423799582463465, "grad_norm": 0.24365593492984772, "learning_rate": 8.950011408957206e-06, "loss": 0.0052, "step": 5720 }, { "epoch": 4.349971531599924, "grad_norm": 0.5765083432197571, "learning_rate": 8.944588397342093e-06, "loss": 0.3057, "step": 5730 }, { "epoch": 4.357563104953502, "grad_norm": 36.48699951171875, "learning_rate": 8.939153069245291e-06, "loss": 0.1687, "step": 5740 }, { "epoch": 4.365154678307079, "grad_norm": 0.011977112852036953, "learning_rate": 8.933705441637931e-06, "loss": 0.0129, "step": 5750 }, { "epoch": 4.372746251660657, "grad_norm": 0.049162607640028, "learning_rate": 8.928245531529546e-06, "loss": 0.0747, "step": 5760 }, { "epoch": 4.380337825014234, "grad_norm": 0.006424940191209316, "learning_rate": 8.922773355968018e-06, "loss": 0.0001, "step": 5770 }, { "epoch": 4.387929398367811, "grad_norm": 0.0021049147471785545, "learning_rate": 8.91728893203953e-06, "loss": 0.0011, "step": 5780 }, { "epoch": 4.395520971721389, "grad_norm": 0.005935146939009428, "learning_rate": 8.911792276868502e-06, "loss": 0.0685, "step": 5790 }, { "epoch": 4.403112545074967, "grad_norm": 0.16192130744457245, "learning_rate": 8.906283407617555e-06, "loss": 0.0789, "step": 5800 }, { "epoch": 4.410704118428544, "grad_norm": 0.0363471657037735, "learning_rate": 8.900762341487439e-06, "loss": 0.0003, "step": 5810 }, { "epoch": 4.418295691782122, "grad_norm": 0.03035406582057476, "learning_rate": 8.895229095716988e-06, "loss": 0.0004, "step": 5820 }, { "epoch": 4.4258872651356995, "grad_norm": 0.0051777479238808155, "learning_rate": 8.889683687583067e-06, "loss": 0.0974, "step": 5830 }, { "epoch": 4.433478838489277, "grad_norm": 0.001428132993169129, "learning_rate": 8.884126134400516e-06, "loss": 0.0104, "step": 5840 }, { "epoch": 4.441070411842855, "grad_norm": 0.029337646439671516, "learning_rate": 8.8785564535221e-06, "loss": 0.1961, "step": 5850 }, { "epoch": 4.448661985196432, "grad_norm": 103.57210540771484, "learning_rate": 8.872974662338443e-06, "loss": 0.0941, "step": 5860 }, { "epoch": 4.456253558550009, "grad_norm": 0.006421659607440233, "learning_rate": 8.86738077827799e-06, "loss": 0.0586, "step": 5870 }, { "epoch": 4.463845131903587, "grad_norm": 0.21757641434669495, "learning_rate": 8.861774818806939e-06, "loss": 0.1107, "step": 5880 }, { "epoch": 4.471436705257164, "grad_norm": 0.2700095474720001, "learning_rate": 8.856156801429196e-06, "loss": 0.1388, "step": 5890 }, { "epoch": 4.479028278610742, "grad_norm": 0.0029901862144470215, "learning_rate": 8.850526743686314e-06, "loss": 0.1908, "step": 5900 }, { "epoch": 4.48661985196432, "grad_norm": 0.008274559862911701, "learning_rate": 8.844884663157441e-06, "loss": 0.0842, "step": 5910 }, { "epoch": 4.494211425317897, "grad_norm": 0.006725401151925325, "learning_rate": 8.83923057745926e-06, "loss": 0.0003, "step": 5920 }, { "epoch": 4.501802998671475, "grad_norm": 13.423134803771973, "learning_rate": 8.833564504245953e-06, "loss": 0.0658, "step": 5930 }, { "epoch": 4.509394572025053, "grad_norm": 0.047781139612197876, "learning_rate": 8.827886461209114e-06, "loss": 0.0008, "step": 5940 }, { "epoch": 4.516986145378629, "grad_norm": 0.0009586279047653079, "learning_rate": 8.82219646607772e-06, "loss": 0.0003, "step": 5950 }, { "epoch": 4.524577718732207, "grad_norm": 0.07489871978759766, "learning_rate": 8.816494536618069e-06, "loss": 0.0003, "step": 5960 }, { "epoch": 4.532169292085785, "grad_norm": 0.015722280368208885, "learning_rate": 8.810780690633715e-06, "loss": 0.1269, "step": 5970 }, { "epoch": 4.539760865439362, "grad_norm": 0.01760883256793022, "learning_rate": 8.805054945965429e-06, "loss": 0.0659, "step": 5980 }, { "epoch": 4.54735243879294, "grad_norm": 0.03223474696278572, "learning_rate": 8.799317320491125e-06, "loss": 0.0005, "step": 5990 }, { "epoch": 4.5549440121465175, "grad_norm": 0.0017072842456400394, "learning_rate": 8.793567832125823e-06, "loss": 0.1485, "step": 6000 }, { "epoch": 4.562535585500095, "grad_norm": 0.0031113557051867247, "learning_rate": 8.787806498821572e-06, "loss": 0.0058, "step": 6010 }, { "epoch": 4.570127158853673, "grad_norm": 0.016612514853477478, "learning_rate": 8.782033338567414e-06, "loss": 0.0294, "step": 6020 }, { "epoch": 4.57771873220725, "grad_norm": 0.010033627972006798, "learning_rate": 8.776248369389319e-06, "loss": 0.064, "step": 6030 }, { "epoch": 4.585310305560828, "grad_norm": 0.007523770444095135, "learning_rate": 8.770451609350123e-06, "loss": 0.1784, "step": 6040 }, { "epoch": 4.592901878914405, "grad_norm": 0.0006488583167083561, "learning_rate": 8.764643076549481e-06, "loss": 0.0001, "step": 6050 }, { "epoch": 4.600493452267982, "grad_norm": 49.0224494934082, "learning_rate": 8.75882278912381e-06, "loss": 0.1479, "step": 6060 }, { "epoch": 4.60808502562156, "grad_norm": 0.05112855136394501, "learning_rate": 8.752990765246222e-06, "loss": 0.0742, "step": 6070 }, { "epoch": 4.615676598975138, "grad_norm": 0.007768516894429922, "learning_rate": 8.747147023126486e-06, "loss": 0.0547, "step": 6080 }, { "epoch": 4.623268172328715, "grad_norm": 0.03929920494556427, "learning_rate": 8.741291581010945e-06, "loss": 0.0005, "step": 6090 }, { "epoch": 4.630859745682293, "grad_norm": 0.0333462730050087, "learning_rate": 8.735424457182483e-06, "loss": 0.0912, "step": 6100 }, { "epoch": 4.6384513190358705, "grad_norm": 0.0021920499857515097, "learning_rate": 8.729545669960459e-06, "loss": 0.0025, "step": 6110 }, { "epoch": 4.646042892389447, "grad_norm": 0.24167831242084503, "learning_rate": 8.723655237700646e-06, "loss": 0.0184, "step": 6120 }, { "epoch": 4.653634465743025, "grad_norm": 0.01909787394106388, "learning_rate": 8.71775317879518e-06, "loss": 0.0001, "step": 6130 }, { "epoch": 4.6612260390966025, "grad_norm": 97.6840591430664, "learning_rate": 8.711839511672497e-06, "loss": 0.0578, "step": 6140 }, { "epoch": 4.66881761245018, "grad_norm": 0.000244935043156147, "learning_rate": 8.705914254797283e-06, "loss": 0.1423, "step": 6150 }, { "epoch": 4.676409185803758, "grad_norm": 0.0006741081597283483, "learning_rate": 8.699977426670403e-06, "loss": 0.0306, "step": 6160 }, { "epoch": 4.684000759157335, "grad_norm": 0.001535810879431665, "learning_rate": 8.69402904582886e-06, "loss": 0.0496, "step": 6170 }, { "epoch": 4.691592332510913, "grad_norm": 0.4821704030036926, "learning_rate": 8.688069130845725e-06, "loss": 0.0443, "step": 6180 }, { "epoch": 4.699183905864491, "grad_norm": 0.002279536332935095, "learning_rate": 8.682097700330086e-06, "loss": 0.0222, "step": 6190 }, { "epoch": 4.706775479218068, "grad_norm": 0.009520245715975761, "learning_rate": 8.67611477292698e-06, "loss": 0.1731, "step": 6200 }, { "epoch": 4.714367052571646, "grad_norm": 0.1851215660572052, "learning_rate": 8.67012036731735e-06, "loss": 0.0629, "step": 6210 }, { "epoch": 4.721958625925223, "grad_norm": 0.12576204538345337, "learning_rate": 8.664114502217975e-06, "loss": 0.0448, "step": 6220 }, { "epoch": 4.7295501992788, "grad_norm": 0.015547769144177437, "learning_rate": 8.65809719638141e-06, "loss": 0.0147, "step": 6230 }, { "epoch": 4.737141772632378, "grad_norm": 0.2670181095600128, "learning_rate": 8.65206846859594e-06, "loss": 0.0005, "step": 6240 }, { "epoch": 4.7447333459859555, "grad_norm": 0.028395511209964752, "learning_rate": 8.646028337685509e-06, "loss": 0.05, "step": 6250 }, { "epoch": 4.752324919339533, "grad_norm": 0.018742332234978676, "learning_rate": 8.639976822509666e-06, "loss": 0.2398, "step": 6260 }, { "epoch": 4.759916492693111, "grad_norm": 12.270938873291016, "learning_rate": 8.633913941963507e-06, "loss": 0.313, "step": 6270 }, { "epoch": 4.767508066046688, "grad_norm": 0.07293716818094254, "learning_rate": 8.627839714977618e-06, "loss": 0.0008, "step": 6280 }, { "epoch": 4.775099639400266, "grad_norm": 0.06347032636404037, "learning_rate": 8.621754160518005e-06, "loss": 0.0221, "step": 6290 }, { "epoch": 4.782691212753843, "grad_norm": 0.0011452403850853443, "learning_rate": 8.615657297586051e-06, "loss": 0.1013, "step": 6300 }, { "epoch": 4.79028278610742, "grad_norm": 0.0021203244104981422, "learning_rate": 8.609549145218442e-06, "loss": 0.0007, "step": 6310 }, { "epoch": 4.797874359460998, "grad_norm": 0.006574318744242191, "learning_rate": 8.603429722487117e-06, "loss": 0.0725, "step": 6320 }, { "epoch": 4.805465932814576, "grad_norm": 0.00014791313151363283, "learning_rate": 8.597299048499206e-06, "loss": 0.0532, "step": 6330 }, { "epoch": 4.813057506168153, "grad_norm": 0.12207093834877014, "learning_rate": 8.591157142396966e-06, "loss": 0.1137, "step": 6340 }, { "epoch": 4.820649079521731, "grad_norm": 0.027442127466201782, "learning_rate": 8.58500402335773e-06, "loss": 0.0812, "step": 6350 }, { "epoch": 4.828240652875309, "grad_norm": 0.00018395182269159704, "learning_rate": 8.578839710593836e-06, "loss": 0.1686, "step": 6360 }, { "epoch": 4.835832226228886, "grad_norm": 0.06821048259735107, "learning_rate": 8.57266422335258e-06, "loss": 0.0005, "step": 6370 }, { "epoch": 4.843423799582464, "grad_norm": 9.863347804639488e-05, "learning_rate": 8.56647758091614e-06, "loss": 0.0005, "step": 6380 }, { "epoch": 4.8510153729360415, "grad_norm": 0.0016949453856796026, "learning_rate": 8.560279802601533e-06, "loss": 0.1504, "step": 6390 }, { "epoch": 4.858606946289618, "grad_norm": 0.0009430780191905797, "learning_rate": 8.554070907760544e-06, "loss": 0.0, "step": 6400 }, { "epoch": 4.866198519643196, "grad_norm": 0.02552955597639084, "learning_rate": 8.547850915779662e-06, "loss": 0.0001, "step": 6410 }, { "epoch": 4.8737900929967735, "grad_norm": 0.014719455502927303, "learning_rate": 8.541619846080039e-06, "loss": 0.15, "step": 6420 }, { "epoch": 4.881381666350351, "grad_norm": 0.09882048517465591, "learning_rate": 8.535377718117399e-06, "loss": 0.0569, "step": 6430 }, { "epoch": 4.888973239703929, "grad_norm": 0.22454605996608734, "learning_rate": 8.52912455138201e-06, "loss": 0.1482, "step": 6440 }, { "epoch": 4.896564813057506, "grad_norm": 0.08625132590532303, "learning_rate": 8.52286036539859e-06, "loss": 0.0011, "step": 6450 }, { "epoch": 4.904156386411084, "grad_norm": 0.03739362582564354, "learning_rate": 8.51658517972628e-06, "loss": 0.1778, "step": 6460 }, { "epoch": 4.911747959764662, "grad_norm": 0.21021807193756104, "learning_rate": 8.510299013958559e-06, "loss": 0.0011, "step": 6470 }, { "epoch": 4.919339533118238, "grad_norm": 0.04205634444952011, "learning_rate": 8.504001887723185e-06, "loss": 0.0787, "step": 6480 }, { "epoch": 4.926931106471816, "grad_norm": 0.09222347289323807, "learning_rate": 8.497693820682146e-06, "loss": 0.0006, "step": 6490 }, { "epoch": 4.934522679825394, "grad_norm": 0.1209307536482811, "learning_rate": 8.491374832531591e-06, "loss": 0.053, "step": 6500 }, { "epoch": 4.942114253178971, "grad_norm": 0.009995940141379833, "learning_rate": 8.485044943001763e-06, "loss": 0.0096, "step": 6510 }, { "epoch": 4.949705826532549, "grad_norm": 0.018289346247911453, "learning_rate": 8.47870417185695e-06, "loss": 0.0012, "step": 6520 }, { "epoch": 4.9572973998861265, "grad_norm": 65.71520233154297, "learning_rate": 8.472352538895411e-06, "loss": 0.1783, "step": 6530 }, { "epoch": 4.964888973239704, "grad_norm": 57.22151184082031, "learning_rate": 8.465990063949323e-06, "loss": 0.1034, "step": 6540 }, { "epoch": 4.972480546593282, "grad_norm": 0.003517146920785308, "learning_rate": 8.459616766884713e-06, "loss": 0.0024, "step": 6550 }, { "epoch": 4.980072119946859, "grad_norm": 0.0020259765442460775, "learning_rate": 8.453232667601403e-06, "loss": 0.0001, "step": 6560 }, { "epoch": 4.987663693300436, "grad_norm": 0.007150826510041952, "learning_rate": 8.44683778603294e-06, "loss": 0.1704, "step": 6570 }, { "epoch": 4.995255266654014, "grad_norm": 0.0018830208573490381, "learning_rate": 8.440432142146535e-06, "loss": 0.0349, "step": 6580 }, { "epoch": 4.99981021066616, "eval_f1": 0.9552939310725507, "eval_loss": 0.15884605050086975, "eval_precision": 0.9567644368540595, "eval_recall": 0.9552352048558422, "eval_runtime": 75.7698, "eval_samples_per_second": 17.395, "eval_steps_per_second": 17.395, "step": 6586 }, { "epoch": 5.002846840007591, "grad_norm": 0.002661398844793439, "learning_rate": 8.434015755943013e-06, "loss": 0.0002, "step": 6590 }, { "epoch": 5.010438413361169, "grad_norm": 0.015321805141866207, "learning_rate": 8.427588647456727e-06, "loss": 0.0006, "step": 6600 }, { "epoch": 5.018029986714747, "grad_norm": 0.015539165586233139, "learning_rate": 8.42115083675552e-06, "loss": 0.128, "step": 6610 }, { "epoch": 5.025621560068324, "grad_norm": 5.927582263946533, "learning_rate": 8.414702343940647e-06, "loss": 0.0743, "step": 6620 }, { "epoch": 5.033213133421902, "grad_norm": 0.0004428077954798937, "learning_rate": 8.408243189146714e-06, "loss": 0.0764, "step": 6630 }, { "epoch": 5.0408047067754795, "grad_norm": 13.519503593444824, "learning_rate": 8.401773392541621e-06, "loss": 0.0837, "step": 6640 }, { "epoch": 5.048396280129057, "grad_norm": 0.0011204121401533484, "learning_rate": 8.395292974326497e-06, "loss": 0.0001, "step": 6650 }, { "epoch": 5.055987853482634, "grad_norm": 0.005702109541743994, "learning_rate": 8.388801954735632e-06, "loss": 0.0003, "step": 6660 }, { "epoch": 5.0635794268362115, "grad_norm": 0.009877257980406284, "learning_rate": 8.38230035403642e-06, "loss": 0.0001, "step": 6670 }, { "epoch": 5.071171000189789, "grad_norm": 0.0006185189704410732, "learning_rate": 8.375788192529292e-06, "loss": 0.0002, "step": 6680 }, { "epoch": 5.078762573543367, "grad_norm": 0.0004436051531229168, "learning_rate": 8.369265490547653e-06, "loss": 0.0004, "step": 6690 }, { "epoch": 5.086354146896944, "grad_norm": 0.016778159886598587, "learning_rate": 8.362732268457824e-06, "loss": 0.1505, "step": 6700 }, { "epoch": 5.093945720250522, "grad_norm": 0.13505133986473083, "learning_rate": 8.356188546658966e-06, "loss": 0.0825, "step": 6710 }, { "epoch": 5.1015372936041, "grad_norm": 0.015829697251319885, "learning_rate": 8.34963434558303e-06, "loss": 0.106, "step": 6720 }, { "epoch": 5.109128866957677, "grad_norm": 0.006577119696885347, "learning_rate": 8.343069685694687e-06, "loss": 0.1537, "step": 6730 }, { "epoch": 5.116720440311255, "grad_norm": 0.0571792870759964, "learning_rate": 8.33649458749126e-06, "loss": 0.023, "step": 6740 }, { "epoch": 5.124312013664832, "grad_norm": 0.13444474339485168, "learning_rate": 8.329909071502668e-06, "loss": 0.1881, "step": 6750 }, { "epoch": 5.131903587018409, "grad_norm": 0.011354477144777775, "learning_rate": 8.32331315829136e-06, "loss": 0.2186, "step": 6760 }, { "epoch": 5.139495160371987, "grad_norm": 0.11647947877645493, "learning_rate": 8.31670686845224e-06, "loss": 0.0005, "step": 6770 }, { "epoch": 5.147086733725565, "grad_norm": 0.03318728879094124, "learning_rate": 8.310090222612623e-06, "loss": 0.0004, "step": 6780 }, { "epoch": 5.154678307079142, "grad_norm": 0.0020830295979976654, "learning_rate": 8.303463241432156e-06, "loss": 0.0738, "step": 6790 }, { "epoch": 5.16226988043272, "grad_norm": 0.18546123802661896, "learning_rate": 8.296825945602749e-06, "loss": 0.225, "step": 6800 }, { "epoch": 5.1698614537862975, "grad_norm": 0.013226731680333614, "learning_rate": 8.290178355848528e-06, "loss": 0.0024, "step": 6810 }, { "epoch": 5.177453027139875, "grad_norm": 0.0015887143090367317, "learning_rate": 8.283520492925758e-06, "loss": 0.1161, "step": 6820 }, { "epoch": 5.185044600493452, "grad_norm": 12.341133117675781, "learning_rate": 8.276852377622777e-06, "loss": 0.0333, "step": 6830 }, { "epoch": 5.1926361738470295, "grad_norm": 0.48488712310791016, "learning_rate": 8.270174030759939e-06, "loss": 0.0025, "step": 6840 }, { "epoch": 5.200227747200607, "grad_norm": 0.09974020719528198, "learning_rate": 8.263485473189542e-06, "loss": 0.0003, "step": 6850 }, { "epoch": 5.207819320554185, "grad_norm": 0.005017921794205904, "learning_rate": 8.256786725795767e-06, "loss": 0.0707, "step": 6860 }, { "epoch": 5.215410893907762, "grad_norm": 16.735441207885742, "learning_rate": 8.250077809494612e-06, "loss": 0.1761, "step": 6870 }, { "epoch": 5.22300246726134, "grad_norm": 0.08619498461484909, "learning_rate": 8.243358745233822e-06, "loss": 0.0025, "step": 6880 }, { "epoch": 5.230594040614918, "grad_norm": 0.008258694782853127, "learning_rate": 8.236629553992837e-06, "loss": 0.1096, "step": 6890 }, { "epoch": 5.238185613968495, "grad_norm": 0.032047972083091736, "learning_rate": 8.229890256782705e-06, "loss": 0.0774, "step": 6900 }, { "epoch": 5.245777187322073, "grad_norm": 0.12164535373449326, "learning_rate": 8.223140874646039e-06, "loss": 0.041, "step": 6910 }, { "epoch": 5.25336876067565, "grad_norm": 0.30879223346710205, "learning_rate": 8.216381428656935e-06, "loss": 0.0008, "step": 6920 }, { "epoch": 5.260960334029227, "grad_norm": 0.011329672299325466, "learning_rate": 8.209611939920912e-06, "loss": 0.0507, "step": 6930 }, { "epoch": 5.268551907382805, "grad_norm": 0.0024318841751664877, "learning_rate": 8.202832429574851e-06, "loss": 0.0511, "step": 6940 }, { "epoch": 5.2761434807363825, "grad_norm": 0.06363888084888458, "learning_rate": 8.196042918786923e-06, "loss": 0.0418, "step": 6950 }, { "epoch": 5.28373505408996, "grad_norm": 0.006296386010944843, "learning_rate": 8.189243428756518e-06, "loss": 0.0013, "step": 6960 }, { "epoch": 5.291326627443538, "grad_norm": 1.5055712461471558, "learning_rate": 8.182433980714191e-06, "loss": 0.0003, "step": 6970 }, { "epoch": 5.298918200797115, "grad_norm": 0.04809055104851723, "learning_rate": 8.175614595921589e-06, "loss": 0.0001, "step": 6980 }, { "epoch": 5.306509774150693, "grad_norm": 0.0006017005071043968, "learning_rate": 8.168785295671385e-06, "loss": 0.0001, "step": 6990 }, { "epoch": 5.314101347504271, "grad_norm": 0.05823567882180214, "learning_rate": 8.161946101287205e-06, "loss": 0.1, "step": 7000 }, { "epoch": 5.321692920857847, "grad_norm": 0.21126702427864075, "learning_rate": 8.155097034123582e-06, "loss": 0.0012, "step": 7010 }, { "epoch": 5.329284494211425, "grad_norm": 0.005064593628048897, "learning_rate": 8.148238115565865e-06, "loss": 0.2162, "step": 7020 }, { "epoch": 5.336876067565003, "grad_norm": 0.03429802507162094, "learning_rate": 8.141369367030165e-06, "loss": 0.0068, "step": 7030 }, { "epoch": 5.34446764091858, "grad_norm": 0.019597377628087997, "learning_rate": 8.134490809963285e-06, "loss": 0.0447, "step": 7040 }, { "epoch": 5.352059214272158, "grad_norm": 3.237245559692383, "learning_rate": 8.127602465842656e-06, "loss": 0.0408, "step": 7050 }, { "epoch": 5.3596507876257355, "grad_norm": 0.1109641045331955, "learning_rate": 8.12070435617627e-06, "loss": 0.0041, "step": 7060 }, { "epoch": 5.367242360979313, "grad_norm": 1.6172115802764893, "learning_rate": 8.113796502502605e-06, "loss": 0.0008, "step": 7070 }, { "epoch": 5.374833934332891, "grad_norm": 0.0019253261853009462, "learning_rate": 8.106878926390565e-06, "loss": 0.0106, "step": 7080 }, { "epoch": 5.382425507686468, "grad_norm": 0.010185305029153824, "learning_rate": 8.099951649439415e-06, "loss": 0.17, "step": 7090 }, { "epoch": 5.390017081040045, "grad_norm": 0.00028460906469263136, "learning_rate": 8.093014693278705e-06, "loss": 0.0814, "step": 7100 }, { "epoch": 5.397608654393623, "grad_norm": 0.09348779916763306, "learning_rate": 8.08606807956821e-06, "loss": 0.0562, "step": 7110 }, { "epoch": 5.4052002277472, "grad_norm": 0.01985323429107666, "learning_rate": 8.079111829997861e-06, "loss": 0.0004, "step": 7120 }, { "epoch": 5.412791801100778, "grad_norm": 0.084492027759552, "learning_rate": 8.072145966287668e-06, "loss": 0.0393, "step": 7130 }, { "epoch": 5.420383374454356, "grad_norm": 0.008949169889092445, "learning_rate": 8.06517051018767e-06, "loss": 0.0027, "step": 7140 }, { "epoch": 5.427974947807933, "grad_norm": 0.010001681745052338, "learning_rate": 8.058185483477849e-06, "loss": 0.0002, "step": 7150 }, { "epoch": 5.435566521161511, "grad_norm": 0.00013484137889463454, "learning_rate": 8.051190907968077e-06, "loss": 0.0617, "step": 7160 }, { "epoch": 5.443158094515089, "grad_norm": 0.028125835582613945, "learning_rate": 8.044186805498033e-06, "loss": 0.0003, "step": 7170 }, { "epoch": 5.450749667868665, "grad_norm": 0.011845303699374199, "learning_rate": 8.037173197937149e-06, "loss": 0.0002, "step": 7180 }, { "epoch": 5.458341241222243, "grad_norm": 0.021918371319770813, "learning_rate": 8.030150107184535e-06, "loss": 0.0003, "step": 7190 }, { "epoch": 5.465932814575821, "grad_norm": 0.002744874684140086, "learning_rate": 8.023117555168907e-06, "loss": 0.0174, "step": 7200 }, { "epoch": 5.473524387929398, "grad_norm": 0.0008592222584411502, "learning_rate": 8.016075563848524e-06, "loss": 0.0001, "step": 7210 }, { "epoch": 5.481115961282976, "grad_norm": 0.0009818489197641611, "learning_rate": 8.009024155211125e-06, "loss": 0.0001, "step": 7220 }, { "epoch": 5.4887075346365535, "grad_norm": 0.0036790217272937298, "learning_rate": 8.001963351273843e-06, "loss": 0.0001, "step": 7230 }, { "epoch": 5.496299107990131, "grad_norm": 0.009668831713497639, "learning_rate": 7.994893174083151e-06, "loss": 0.0663, "step": 7240 }, { "epoch": 5.503890681343709, "grad_norm": 0.008087705820798874, "learning_rate": 7.98781364571479e-06, "loss": 0.0, "step": 7250 }, { "epoch": 5.511482254697286, "grad_norm": 0.001750052673742175, "learning_rate": 7.980724788273698e-06, "loss": 0.0001, "step": 7260 }, { "epoch": 5.519073828050864, "grad_norm": 0.0040147858671844006, "learning_rate": 7.973626623893942e-06, "loss": 0.1629, "step": 7270 }, { "epoch": 5.526665401404441, "grad_norm": 0.005076427478343248, "learning_rate": 7.96651917473865e-06, "loss": 0.0001, "step": 7280 }, { "epoch": 5.534256974758018, "grad_norm": 0.022049933671951294, "learning_rate": 7.959402462999934e-06, "loss": 0.0001, "step": 7290 }, { "epoch": 5.541848548111596, "grad_norm": 7.288018226623535, "learning_rate": 7.952276510898838e-06, "loss": 0.0612, "step": 7300 }, { "epoch": 5.549440121465174, "grad_norm": 0.48564571142196655, "learning_rate": 7.945141340685249e-06, "loss": 0.0001, "step": 7310 }, { "epoch": 5.557031694818751, "grad_norm": 0.0020839564967900515, "learning_rate": 7.937996974637839e-06, "loss": 0.0002, "step": 7320 }, { "epoch": 5.564623268172329, "grad_norm": 0.0012567265657708049, "learning_rate": 7.930843435063996e-06, "loss": 0.0003, "step": 7330 }, { "epoch": 5.5722148415259065, "grad_norm": 0.0036961582954972982, "learning_rate": 7.923680744299747e-06, "loss": 0.0876, "step": 7340 }, { "epoch": 5.579806414879484, "grad_norm": 31.300655364990234, "learning_rate": 7.916508924709693e-06, "loss": 0.151, "step": 7350 }, { "epoch": 5.587397988233061, "grad_norm": 0.008196866139769554, "learning_rate": 7.909327998686942e-06, "loss": 0.0001, "step": 7360 }, { "epoch": 5.5949895615866385, "grad_norm": 10.782143592834473, "learning_rate": 7.902137988653032e-06, "loss": 0.0539, "step": 7370 }, { "epoch": 5.602581134940216, "grad_norm": 0.004750726278871298, "learning_rate": 7.894938917057866e-06, "loss": 0.0385, "step": 7380 }, { "epoch": 5.610172708293794, "grad_norm": 0.04581161588430405, "learning_rate": 7.887730806379641e-06, "loss": 0.2684, "step": 7390 }, { "epoch": 5.617764281647371, "grad_norm": 0.026009181514382362, "learning_rate": 7.880513679124777e-06, "loss": 0.1283, "step": 7400 }, { "epoch": 5.625355855000949, "grad_norm": 2.0138673782348633, "learning_rate": 7.873287557827846e-06, "loss": 0.0004, "step": 7410 }, { "epoch": 5.632947428354527, "grad_norm": 0.14630401134490967, "learning_rate": 7.866052465051506e-06, "loss": 0.0503, "step": 7420 }, { "epoch": 5.640539001708104, "grad_norm": 0.0008778591873124242, "learning_rate": 7.858808423386422e-06, "loss": 0.0032, "step": 7430 }, { "epoch": 5.648130575061682, "grad_norm": 0.004400940611958504, "learning_rate": 7.851555455451208e-06, "loss": 0.0002, "step": 7440 }, { "epoch": 5.6557221484152596, "grad_norm": 0.002588229486718774, "learning_rate": 7.844293583892341e-06, "loss": 0.0055, "step": 7450 }, { "epoch": 5.663313721768836, "grad_norm": 0.0016362261958420277, "learning_rate": 7.837022831384107e-06, "loss": 0.0001, "step": 7460 }, { "epoch": 5.670905295122414, "grad_norm": 0.006628331728279591, "learning_rate": 7.829743220628515e-06, "loss": 0.0001, "step": 7470 }, { "epoch": 5.6784968684759916, "grad_norm": 0.0015720854280516505, "learning_rate": 7.822454774355233e-06, "loss": 0.1205, "step": 7480 }, { "epoch": 5.686088441829569, "grad_norm": 0.005687546916306019, "learning_rate": 7.815157515321521e-06, "loss": 0.1584, "step": 7490 }, { "epoch": 5.693680015183147, "grad_norm": 0.0018359271343797445, "learning_rate": 7.807851466312152e-06, "loss": 0.0833, "step": 7500 }, { "epoch": 5.701271588536724, "grad_norm": 0.004786277189850807, "learning_rate": 7.80053665013935e-06, "loss": 0.0004, "step": 7510 }, { "epoch": 5.708863161890302, "grad_norm": 0.14934459328651428, "learning_rate": 7.793213089642705e-06, "loss": 0.0678, "step": 7520 }, { "epoch": 5.716454735243879, "grad_norm": 0.002186194993555546, "learning_rate": 7.785880807689119e-06, "loss": 0.014, "step": 7530 }, { "epoch": 5.7240463085974564, "grad_norm": 0.007107855286449194, "learning_rate": 7.778539827172717e-06, "loss": 0.0021, "step": 7540 }, { "epoch": 5.731637881951034, "grad_norm": 0.00156366394367069, "learning_rate": 7.771190171014789e-06, "loss": 0.0299, "step": 7550 }, { "epoch": 5.739229455304612, "grad_norm": 0.006057819351553917, "learning_rate": 7.763831862163715e-06, "loss": 0.3021, "step": 7560 }, { "epoch": 5.746821028658189, "grad_norm": 0.1267128884792328, "learning_rate": 7.756464923594889e-06, "loss": 0.1477, "step": 7570 }, { "epoch": 5.754412602011767, "grad_norm": 0.003787196008488536, "learning_rate": 7.74908937831065e-06, "loss": 0.0012, "step": 7580 }, { "epoch": 5.762004175365345, "grad_norm": 0.004670240916311741, "learning_rate": 7.741705249340212e-06, "loss": 0.0001, "step": 7590 }, { "epoch": 5.769595748718922, "grad_norm": 0.0031925721559673548, "learning_rate": 7.734312559739591e-06, "loss": 0.1256, "step": 7600 }, { "epoch": 5.7771873220725, "grad_norm": 0.05346198379993439, "learning_rate": 7.726911332591533e-06, "loss": 0.0297, "step": 7610 }, { "epoch": 5.7847788954260775, "grad_norm": 9.102517127990723, "learning_rate": 7.719501591005435e-06, "loss": 0.0291, "step": 7620 }, { "epoch": 5.792370468779654, "grad_norm": 0.012199531309306622, "learning_rate": 7.71208335811729e-06, "loss": 0.0015, "step": 7630 }, { "epoch": 5.799962042133232, "grad_norm": 0.0010750379879027605, "learning_rate": 7.704656657089594e-06, "loss": 0.0002, "step": 7640 }, { "epoch": 5.8075536154868095, "grad_norm": 0.0029223288875073195, "learning_rate": 7.697221511111289e-06, "loss": 0.0404, "step": 7650 }, { "epoch": 5.815145188840387, "grad_norm": 0.030176958069205284, "learning_rate": 7.689777943397684e-06, "loss": 0.0002, "step": 7660 }, { "epoch": 5.822736762193965, "grad_norm": 0.01166499499231577, "learning_rate": 7.682325977190386e-06, "loss": 0.0381, "step": 7670 }, { "epoch": 5.830328335547542, "grad_norm": 32.26509475708008, "learning_rate": 7.674865635757219e-06, "loss": 0.0993, "step": 7680 }, { "epoch": 5.83791990890112, "grad_norm": 19.091943740844727, "learning_rate": 7.667396942392165e-06, "loss": 0.0492, "step": 7690 }, { "epoch": 5.845511482254698, "grad_norm": 0.01752518303692341, "learning_rate": 7.659919920415282e-06, "loss": 0.0053, "step": 7700 }, { "epoch": 5.853103055608274, "grad_norm": 0.0013000709004700184, "learning_rate": 7.652434593172629e-06, "loss": 0.2842, "step": 7710 }, { "epoch": 5.860694628961852, "grad_norm": 76.4178695678711, "learning_rate": 7.6449409840362e-06, "loss": 0.019, "step": 7720 }, { "epoch": 5.86828620231543, "grad_norm": 49.07400894165039, "learning_rate": 7.63743911640385e-06, "loss": 0.0412, "step": 7730 }, { "epoch": 5.875877775669007, "grad_norm": 0.018517136573791504, "learning_rate": 7.629929013699215e-06, "loss": 0.0113, "step": 7740 }, { "epoch": 5.883469349022585, "grad_norm": 0.0009308361331932247, "learning_rate": 7.622410699371651e-06, "loss": 0.0975, "step": 7750 }, { "epoch": 5.8910609223761625, "grad_norm": 0.002873294521123171, "learning_rate": 7.614884196896146e-06, "loss": 0.0001, "step": 7760 }, { "epoch": 5.89865249572974, "grad_norm": 0.5766377449035645, "learning_rate": 7.607349529773263e-06, "loss": 0.0894, "step": 7770 }, { "epoch": 5.906244069083318, "grad_norm": 0.33659154176712036, "learning_rate": 7.599806721529048e-06, "loss": 0.026, "step": 7780 }, { "epoch": 5.913835642436895, "grad_norm": 0.06800296902656555, "learning_rate": 7.592255795714978e-06, "loss": 0.001, "step": 7790 }, { "epoch": 5.921427215790473, "grad_norm": 0.010890863835811615, "learning_rate": 7.5846967759078646e-06, "loss": 0.0515, "step": 7800 }, { "epoch": 5.92901878914405, "grad_norm": 0.0007496042526327074, "learning_rate": 7.577129685709802e-06, "loss": 0.0196, "step": 7810 }, { "epoch": 5.936610362497627, "grad_norm": 0.12547799944877625, "learning_rate": 7.569554548748076e-06, "loss": 0.0212, "step": 7820 }, { "epoch": 5.944201935851205, "grad_norm": 0.000410243752412498, "learning_rate": 7.561971388675101e-06, "loss": 0.0001, "step": 7830 }, { "epoch": 5.951793509204783, "grad_norm": 0.0626864954829216, "learning_rate": 7.554380229168341e-06, "loss": 0.1047, "step": 7840 }, { "epoch": 5.95938508255836, "grad_norm": 0.0048113660886883736, "learning_rate": 7.546781093930238e-06, "loss": 0.0166, "step": 7850 }, { "epoch": 5.966976655911938, "grad_norm": 0.04934828728437424, "learning_rate": 7.539174006688137e-06, "loss": 0.1765, "step": 7860 }, { "epoch": 5.974568229265516, "grad_norm": 3.118401527404785, "learning_rate": 7.531558991194214e-06, "loss": 0.0369, "step": 7870 }, { "epoch": 5.982159802619093, "grad_norm": 33.45072937011719, "learning_rate": 7.523936071225395e-06, "loss": 0.1186, "step": 7880 }, { "epoch": 5.98975137597267, "grad_norm": 0.09529292583465576, "learning_rate": 7.516305270583291e-06, "loss": 0.0382, "step": 7890 }, { "epoch": 5.997342949326248, "grad_norm": 0.09993643313646317, "learning_rate": 7.50866661309412e-06, "loss": 0.1966, "step": 7900 }, { "epoch": 5.999620421332321, "eval_f1": 0.9453778934602862, "eval_loss": 0.17724575102329254, "eval_precision": 0.9455308702748206, "eval_recall": 0.9453717754172989, "eval_runtime": 75.7194, "eval_samples_per_second": 17.406, "eval_steps_per_second": 17.406, "step": 7903 }, { "epoch": 6.004934522679825, "grad_norm": 0.5747145414352417, "learning_rate": 7.5010201226086285e-06, "loss": 0.0792, "step": 7910 }, { "epoch": 6.012526096033403, "grad_norm": 15.592010498046875, "learning_rate": 7.493365823002023e-06, "loss": 0.066, "step": 7920 }, { "epoch": 6.0201176693869805, "grad_norm": 0.002133031841367483, "learning_rate": 7.4857037381738924e-06, "loss": 0.0001, "step": 7930 }, { "epoch": 6.027709242740558, "grad_norm": 0.006577716208994389, "learning_rate": 7.478033892048134e-06, "loss": 0.0005, "step": 7940 }, { "epoch": 6.035300816094136, "grad_norm": 0.0061035482212901115, "learning_rate": 7.470356308572879e-06, "loss": 0.0, "step": 7950 }, { "epoch": 6.042892389447713, "grad_norm": 0.0037885792553424835, "learning_rate": 7.462671011720417e-06, "loss": 0.0001, "step": 7960 }, { "epoch": 6.050483962801291, "grad_norm": 0.010262789204716682, "learning_rate": 7.454978025487121e-06, "loss": 0.0007, "step": 7970 }, { "epoch": 6.058075536154868, "grad_norm": 0.0021226617973297834, "learning_rate": 7.447277373893373e-06, "loss": 0.0386, "step": 7980 }, { "epoch": 6.065667109508445, "grad_norm": 0.00850209966301918, "learning_rate": 7.439569080983493e-06, "loss": 0.0008, "step": 7990 }, { "epoch": 6.073258682862023, "grad_norm": 0.004618831444531679, "learning_rate": 7.431853170825658e-06, "loss": 0.0, "step": 8000 }, { "epoch": 6.080850256215601, "grad_norm": 0.0010309051722288132, "learning_rate": 7.424129667511824e-06, "loss": 0.0174, "step": 8010 }, { "epoch": 6.088441829569178, "grad_norm": 0.005731165409088135, "learning_rate": 7.4163985951576616e-06, "loss": 0.0099, "step": 8020 }, { "epoch": 6.096033402922756, "grad_norm": 2.437437057495117, "learning_rate": 7.408659977902474e-06, "loss": 0.159, "step": 8030 }, { "epoch": 6.1036249762763335, "grad_norm": 0.008021681569516659, "learning_rate": 7.400913839909119e-06, "loss": 0.0002, "step": 8040 }, { "epoch": 6.111216549629911, "grad_norm": 0.0012970505049452186, "learning_rate": 7.3931602053639414e-06, "loss": 0.0527, "step": 8050 }, { "epoch": 6.118808122983489, "grad_norm": 0.031485993415117264, "learning_rate": 7.385399098476691e-06, "loss": 0.0416, "step": 8060 }, { "epoch": 6.1263996963370655, "grad_norm": 0.037826113402843475, "learning_rate": 7.377630543480447e-06, "loss": 0.0064, "step": 8070 }, { "epoch": 6.133991269690643, "grad_norm": 0.007939423434436321, "learning_rate": 7.369854564631549e-06, "loss": 0.0004, "step": 8080 }, { "epoch": 6.141582843044221, "grad_norm": 0.011576803401112556, "learning_rate": 7.3620711862095116e-06, "loss": 0.0003, "step": 8090 }, { "epoch": 6.149174416397798, "grad_norm": 0.01118936575949192, "learning_rate": 7.354280432516957e-06, "loss": 0.0002, "step": 8100 }, { "epoch": 6.156765989751376, "grad_norm": 0.001931383740156889, "learning_rate": 7.346482327879535e-06, "loss": 0.0009, "step": 8110 }, { "epoch": 6.164357563104954, "grad_norm": 0.005506934132426977, "learning_rate": 7.338676896645848e-06, "loss": 0.0567, "step": 8120 }, { "epoch": 6.171949136458531, "grad_norm": 0.07792196422815323, "learning_rate": 7.330864163187372e-06, "loss": 0.0003, "step": 8130 }, { "epoch": 6.179540709812109, "grad_norm": 0.06636549532413483, "learning_rate": 7.323044151898388e-06, "loss": 0.0658, "step": 8140 }, { "epoch": 6.1871322831656865, "grad_norm": 0.0012724515981972218, "learning_rate": 7.3152168871959e-06, "loss": 0.0605, "step": 8150 }, { "epoch": 6.194723856519263, "grad_norm": 0.0033073413651436567, "learning_rate": 7.307382393519556e-06, "loss": 0.0732, "step": 8160 }, { "epoch": 6.202315429872841, "grad_norm": 0.00361923361197114, "learning_rate": 7.299540695331579e-06, "loss": 0.0054, "step": 8170 }, { "epoch": 6.2099070032264185, "grad_norm": 0.0007601641118526459, "learning_rate": 7.291691817116686e-06, "loss": 0.0001, "step": 8180 }, { "epoch": 6.217498576579996, "grad_norm": 0.0025373934768140316, "learning_rate": 7.283835783382015e-06, "loss": 0.0567, "step": 8190 }, { "epoch": 6.225090149933574, "grad_norm": 0.0037624204996973276, "learning_rate": 7.275972618657041e-06, "loss": 0.0001, "step": 8200 }, { "epoch": 6.232681723287151, "grad_norm": 0.002659817226231098, "learning_rate": 7.268102347493511e-06, "loss": 0.0727, "step": 8210 }, { "epoch": 6.240273296640729, "grad_norm": 0.08516960591077805, "learning_rate": 7.260224994465357e-06, "loss": 0.001, "step": 8220 }, { "epoch": 6.247864869994307, "grad_norm": 0.03827419877052307, "learning_rate": 7.252340584168624e-06, "loss": 0.0023, "step": 8230 }, { "epoch": 6.255456443347883, "grad_norm": 0.0027726832777261734, "learning_rate": 7.2444491412213914e-06, "loss": 0.0536, "step": 8240 }, { "epoch": 6.263048016701461, "grad_norm": 0.0064014289528131485, "learning_rate": 7.236550690263702e-06, "loss": 0.001, "step": 8250 }, { "epoch": 6.270639590055039, "grad_norm": 0.005650675855576992, "learning_rate": 7.228645255957472e-06, "loss": 0.2206, "step": 8260 }, { "epoch": 6.278231163408616, "grad_norm": 21.262990951538086, "learning_rate": 7.2207328629864285e-06, "loss": 0.0884, "step": 8270 }, { "epoch": 6.285822736762194, "grad_norm": 0.03092315047979355, "learning_rate": 7.212813536056025e-06, "loss": 0.0684, "step": 8280 }, { "epoch": 6.293414310115772, "grad_norm": 0.00995034258812666, "learning_rate": 7.2048872998933665e-06, "loss": 0.0003, "step": 8290 }, { "epoch": 6.301005883469349, "grad_norm": 0.08173485100269318, "learning_rate": 7.196954179247127e-06, "loss": 0.0699, "step": 8300 }, { "epoch": 6.308597456822927, "grad_norm": 0.15706369280815125, "learning_rate": 7.189014198887478e-06, "loss": 0.0419, "step": 8310 }, { "epoch": 6.3161890301765045, "grad_norm": 0.44603389501571655, "learning_rate": 7.181067383606015e-06, "loss": 0.0374, "step": 8320 }, { "epoch": 6.323780603530081, "grad_norm": 89.45038604736328, "learning_rate": 7.173113758215667e-06, "loss": 0.0231, "step": 8330 }, { "epoch": 6.331372176883659, "grad_norm": 0.07431600242853165, "learning_rate": 7.165153347550631e-06, "loss": 0.007, "step": 8340 }, { "epoch": 6.3389637502372365, "grad_norm": 0.00812879391014576, "learning_rate": 7.15718617646629e-06, "loss": 0.1122, "step": 8350 }, { "epoch": 6.346555323590814, "grad_norm": 0.4049533009529114, "learning_rate": 7.149212269839132e-06, "loss": 0.0532, "step": 8360 }, { "epoch": 6.354146896944392, "grad_norm": 0.403401255607605, "learning_rate": 7.141231652566681e-06, "loss": 0.0008, "step": 8370 }, { "epoch": 6.361738470297969, "grad_norm": 0.8025851249694824, "learning_rate": 7.133244349567411e-06, "loss": 0.0221, "step": 8380 }, { "epoch": 6.369330043651547, "grad_norm": 0.06498798727989197, "learning_rate": 7.125250385780673e-06, "loss": 0.0621, "step": 8390 }, { "epoch": 6.376921617005125, "grad_norm": 0.0010519091738387942, "learning_rate": 7.1172497861666124e-06, "loss": 0.0404, "step": 8400 }, { "epoch": 6.384513190358702, "grad_norm": 0.01423695683479309, "learning_rate": 7.109242575706099e-06, "loss": 0.0314, "step": 8410 }, { "epoch": 6.392104763712279, "grad_norm": 0.8802148103713989, "learning_rate": 7.10122877940064e-06, "loss": 0.013, "step": 8420 }, { "epoch": 6.399696337065857, "grad_norm": 0.037081677466630936, "learning_rate": 7.093208422272309e-06, "loss": 0.0005, "step": 8430 }, { "epoch": 6.407287910419434, "grad_norm": 0.0005525704473257065, "learning_rate": 7.085181529363661e-06, "loss": 0.0972, "step": 8440 }, { "epoch": 6.414879483773012, "grad_norm": 0.018398938700556755, "learning_rate": 7.077148125737661e-06, "loss": 0.1108, "step": 8450 }, { "epoch": 6.4224710571265895, "grad_norm": 0.040173228830099106, "learning_rate": 7.069108236477604e-06, "loss": 0.0002, "step": 8460 }, { "epoch": 6.430062630480167, "grad_norm": 0.009616430848836899, "learning_rate": 7.061061886687035e-06, "loss": 0.0013, "step": 8470 }, { "epoch": 6.437654203833745, "grad_norm": 78.41429901123047, "learning_rate": 7.053009101489667e-06, "loss": 0.1232, "step": 8480 }, { "epoch": 6.445245777187322, "grad_norm": 0.000696105882525444, "learning_rate": 7.044949906029314e-06, "loss": 0.0066, "step": 8490 }, { "epoch": 6.4528373505409, "grad_norm": 0.012759624980390072, "learning_rate": 7.036884325469797e-06, "loss": 0.205, "step": 8500 }, { "epoch": 6.460428923894477, "grad_norm": 51.840309143066406, "learning_rate": 7.028812384994883e-06, "loss": 0.1227, "step": 8510 }, { "epoch": 6.468020497248054, "grad_norm": 0.0019890512339770794, "learning_rate": 7.0207341098081875e-06, "loss": 0.1419, "step": 8520 }, { "epoch": 6.475612070601632, "grad_norm": 0.003854219801723957, "learning_rate": 7.012649525133112e-06, "loss": 0.0714, "step": 8530 }, { "epoch": 6.48320364395521, "grad_norm": 0.06946977972984314, "learning_rate": 7.004558656212754e-06, "loss": 0.0004, "step": 8540 }, { "epoch": 6.490795217308787, "grad_norm": 0.003731220494955778, "learning_rate": 6.9964615283098405e-06, "loss": 0.0017, "step": 8550 }, { "epoch": 6.498386790662365, "grad_norm": 0.002791723469272256, "learning_rate": 6.988358166706631e-06, "loss": 0.0403, "step": 8560 }, { "epoch": 6.5059783640159425, "grad_norm": 4.053121089935303, "learning_rate": 6.980248596704856e-06, "loss": 0.0008, "step": 8570 }, { "epoch": 6.51356993736952, "grad_norm": 0.0038540286477655172, "learning_rate": 6.97213284362563e-06, "loss": 0.0003, "step": 8580 }, { "epoch": 6.521161510723097, "grad_norm": 0.0033889245241880417, "learning_rate": 6.96401093280937e-06, "loss": 0.0505, "step": 8590 }, { "epoch": 6.5287530840766745, "grad_norm": 0.0008385963155888021, "learning_rate": 6.9558828896157225e-06, "loss": 0.0001, "step": 8600 }, { "epoch": 6.536344657430252, "grad_norm": 0.05049284175038338, "learning_rate": 6.947748739423483e-06, "loss": 0.0776, "step": 8610 }, { "epoch": 6.54393623078383, "grad_norm": 0.014165320433676243, "learning_rate": 6.939608507630513e-06, "loss": 0.0339, "step": 8620 }, { "epoch": 6.551527804137407, "grad_norm": 24.47572898864746, "learning_rate": 6.931462219653662e-06, "loss": 0.1604, "step": 8630 }, { "epoch": 6.559119377490985, "grad_norm": 0.07809809595346451, "learning_rate": 6.923309900928693e-06, "loss": 0.0003, "step": 8640 }, { "epoch": 6.566710950844563, "grad_norm": 0.08131968230009079, "learning_rate": 6.915151576910194e-06, "loss": 0.0097, "step": 8650 }, { "epoch": 6.57430252419814, "grad_norm": 106.42731475830078, "learning_rate": 6.906987273071509e-06, "loss": 0.0111, "step": 8660 }, { "epoch": 6.581894097551718, "grad_norm": 0.0046349032782018185, "learning_rate": 6.898817014904653e-06, "loss": 0.0601, "step": 8670 }, { "epoch": 6.589485670905296, "grad_norm": 0.00192779372446239, "learning_rate": 6.890640827920226e-06, "loss": 0.0349, "step": 8680 }, { "epoch": 6.597077244258872, "grad_norm": 0.0012624857481569052, "learning_rate": 6.882458737647346e-06, "loss": 0.0009, "step": 8690 }, { "epoch": 6.60466881761245, "grad_norm": 0.00019073448493145406, "learning_rate": 6.874270769633564e-06, "loss": 0.0001, "step": 8700 }, { "epoch": 6.612260390966028, "grad_norm": 0.03901955857872963, "learning_rate": 6.866076949444781e-06, "loss": 0.238, "step": 8710 }, { "epoch": 6.619851964319605, "grad_norm": 0.05632855370640755, "learning_rate": 6.857877302665169e-06, "loss": 0.0435, "step": 8720 }, { "epoch": 6.627443537673183, "grad_norm": 0.01720161736011505, "learning_rate": 6.8496718548970956e-06, "loss": 0.0208, "step": 8730 }, { "epoch": 6.6350351110267605, "grad_norm": 0.00398442754521966, "learning_rate": 6.8414606317610435e-06, "loss": 0.0012, "step": 8740 }, { "epoch": 6.642626684380338, "grad_norm": 0.02426181733608246, "learning_rate": 6.833243658895521e-06, "loss": 0.0004, "step": 8750 }, { "epoch": 6.650218257733916, "grad_norm": 14.350150108337402, "learning_rate": 6.825020961956995e-06, "loss": 0.0823, "step": 8760 }, { "epoch": 6.6578098310874925, "grad_norm": 0.0016744782915338874, "learning_rate": 6.816792566619805e-06, "loss": 0.1436, "step": 8770 }, { "epoch": 6.66540140444107, "grad_norm": 0.020618196576833725, "learning_rate": 6.808558498576081e-06, "loss": 0.0006, "step": 8780 }, { "epoch": 6.672992977794648, "grad_norm": 0.13271041214466095, "learning_rate": 6.800318783535665e-06, "loss": 0.0074, "step": 8790 }, { "epoch": 6.680584551148225, "grad_norm": 0.020608441904187202, "learning_rate": 6.792073447226034e-06, "loss": 0.0002, "step": 8800 }, { "epoch": 6.688176124501803, "grad_norm": 0.0014845712576061487, "learning_rate": 6.7838225153922125e-06, "loss": 0.0004, "step": 8810 }, { "epoch": 6.695767697855381, "grad_norm": 0.06566622108221054, "learning_rate": 6.775566013796699e-06, "loss": 0.055, "step": 8820 }, { "epoch": 6.703359271208958, "grad_norm": 0.13233526051044464, "learning_rate": 6.767303968219383e-06, "loss": 0.051, "step": 8830 }, { "epoch": 6.710950844562536, "grad_norm": 12.247241020202637, "learning_rate": 6.759036404457465e-06, "loss": 0.171, "step": 8840 }, { "epoch": 6.7185424179161135, "grad_norm": 0.06808517873287201, "learning_rate": 6.750763348325371e-06, "loss": 0.1818, "step": 8850 }, { "epoch": 6.726133991269691, "grad_norm": 0.011621583253145218, "learning_rate": 6.7424848256546825e-06, "loss": 0.0119, "step": 8860 }, { "epoch": 6.733725564623268, "grad_norm": 22.450834274291992, "learning_rate": 6.734200862294045e-06, "loss": 0.176, "step": 8870 }, { "epoch": 6.7413171379768455, "grad_norm": 11.976455688476562, "learning_rate": 6.725911484109094e-06, "loss": 0.0507, "step": 8880 }, { "epoch": 6.748908711330423, "grad_norm": 0.042554713785648346, "learning_rate": 6.717616716982369e-06, "loss": 0.0004, "step": 8890 }, { "epoch": 6.756500284684001, "grad_norm": 0.0029066246934235096, "learning_rate": 6.7093165868132415e-06, "loss": 0.0066, "step": 8900 }, { "epoch": 6.764091858037578, "grad_norm": 0.31371551752090454, "learning_rate": 6.701011119517824e-06, "loss": 0.0311, "step": 8910 }, { "epoch": 6.771683431391156, "grad_norm": 0.025408325716853142, "learning_rate": 6.692700341028893e-06, "loss": 0.0002, "step": 8920 }, { "epoch": 6.779275004744734, "grad_norm": 0.6896237730979919, "learning_rate": 6.684384277295813e-06, "loss": 0.003, "step": 8930 }, { "epoch": 6.78686657809831, "grad_norm": 0.0014387418050318956, "learning_rate": 6.676062954284447e-06, "loss": 0.1432, "step": 8940 }, { "epoch": 6.794458151451888, "grad_norm": 0.012326021678745747, "learning_rate": 6.667736397977079e-06, "loss": 0.0131, "step": 8950 }, { "epoch": 6.802049724805466, "grad_norm": 0.010481426492333412, "learning_rate": 6.659404634372338e-06, "loss": 0.0027, "step": 8960 }, { "epoch": 6.809641298159043, "grad_norm": 0.11520393937826157, "learning_rate": 6.6510676894851065e-06, "loss": 0.0008, "step": 8970 }, { "epoch": 6.817232871512621, "grad_norm": 14.105742454528809, "learning_rate": 6.6427255893464495e-06, "loss": 0.1792, "step": 8980 }, { "epoch": 6.8248244448661985, "grad_norm": 0.012812143191695213, "learning_rate": 6.634378360003525e-06, "loss": 0.0001, "step": 8990 }, { "epoch": 6.832416018219776, "grad_norm": 0.0041709113866090775, "learning_rate": 6.62602602751951e-06, "loss": 0.0001, "step": 9000 }, { "epoch": 6.840007591573354, "grad_norm": 0.0038161997217684984, "learning_rate": 6.6176686179735095e-06, "loss": 0.0665, "step": 9010 }, { "epoch": 6.847599164926931, "grad_norm": 0.30405986309051514, "learning_rate": 6.6093061574604875e-06, "loss": 0.0624, "step": 9020 }, { "epoch": 6.855190738280509, "grad_norm": 0.001419481704942882, "learning_rate": 6.600938672091178e-06, "loss": 0.0001, "step": 9030 }, { "epoch": 6.862782311634086, "grad_norm": 0.005425265524536371, "learning_rate": 6.592566187992e-06, "loss": 0.0115, "step": 9040 }, { "epoch": 6.870373884987663, "grad_norm": 0.009964833967387676, "learning_rate": 6.584188731304984e-06, "loss": 0.0001, "step": 9050 }, { "epoch": 6.877965458341241, "grad_norm": 17.450939178466797, "learning_rate": 6.575806328187684e-06, "loss": 0.0065, "step": 9060 }, { "epoch": 6.885557031694819, "grad_norm": 0.5963069796562195, "learning_rate": 6.567419004813105e-06, "loss": 0.162, "step": 9070 }, { "epoch": 6.893148605048396, "grad_norm": 0.002563629997894168, "learning_rate": 6.559026787369608e-06, "loss": 0.0006, "step": 9080 }, { "epoch": 6.900740178401974, "grad_norm": 0.0032906217966228724, "learning_rate": 6.550629702060836e-06, "loss": 0.0576, "step": 9090 }, { "epoch": 6.908331751755552, "grad_norm": 0.00252812379039824, "learning_rate": 6.542227775105636e-06, "loss": 0.0003, "step": 9100 }, { "epoch": 6.915923325109129, "grad_norm": 0.13027949631214142, "learning_rate": 6.533821032737968e-06, "loss": 0.1393, "step": 9110 }, { "epoch": 6.923514898462706, "grad_norm": 0.0013868529349565506, "learning_rate": 6.525409501206828e-06, "loss": 0.0003, "step": 9120 }, { "epoch": 6.931106471816284, "grad_norm": 0.0035531616304069757, "learning_rate": 6.516993206776167e-06, "loss": 0.0516, "step": 9130 }, { "epoch": 6.938698045169861, "grad_norm": 0.02282761037349701, "learning_rate": 6.508572175724809e-06, "loss": 0.0811, "step": 9140 }, { "epoch": 6.946289618523439, "grad_norm": 29.90252685546875, "learning_rate": 6.500146434346363e-06, "loss": 0.065, "step": 9150 }, { "epoch": 6.9538811918770165, "grad_norm": 0.14673539996147156, "learning_rate": 6.4917160089491475e-06, "loss": 0.0004, "step": 9160 }, { "epoch": 6.961472765230594, "grad_norm": 2.630889892578125, "learning_rate": 6.483280925856108e-06, "loss": 0.039, "step": 9170 }, { "epoch": 6.969064338584172, "grad_norm": 0.005536849144846201, "learning_rate": 6.474841211404732e-06, "loss": 0.0212, "step": 9180 }, { "epoch": 6.976655911937749, "grad_norm": 13.343396186828613, "learning_rate": 6.466396891946967e-06, "loss": 0.1344, "step": 9190 }, { "epoch": 6.984247485291327, "grad_norm": 13.473750114440918, "learning_rate": 6.457947993849138e-06, "loss": 0.0461, "step": 9200 }, { "epoch": 6.991839058644905, "grad_norm": 0.10873476415872574, "learning_rate": 6.4494945434918695e-06, "loss": 0.0008, "step": 9210 }, { "epoch": 6.999430631998481, "grad_norm": 0.08516258746385574, "learning_rate": 6.441036567269999e-06, "loss": 0.0006, "step": 9220 }, { "epoch": 6.999430631998481, "eval_f1": 0.9574758853469025, "eval_loss": 0.15473049879074097, "eval_precision": 0.9565893515212521, "eval_recall": 0.9575113808801214, "eval_runtime": 75.8291, "eval_samples_per_second": 17.381, "eval_steps_per_second": 17.381, "step": 9220 }, { "epoch": 7.007022205352059, "grad_norm": 0.4523492455482483, "learning_rate": 6.432574091592495e-06, "loss": 0.0557, "step": 9230 }, { "epoch": 7.014613778705637, "grad_norm": 0.03357968479394913, "learning_rate": 6.424107142882371e-06, "loss": 0.0008, "step": 9240 }, { "epoch": 7.022205352059214, "grad_norm": 46.4831657409668, "learning_rate": 6.415635747576613e-06, "loss": 0.0078, "step": 9250 }, { "epoch": 7.029796925412792, "grad_norm": 0.010737122967839241, "learning_rate": 6.40715993212609e-06, "loss": 0.0003, "step": 9260 }, { "epoch": 7.0373884987663695, "grad_norm": 21.10315704345703, "learning_rate": 6.398679722995468e-06, "loss": 0.1309, "step": 9270 }, { "epoch": 7.044980072119947, "grad_norm": 0.010574131272733212, "learning_rate": 6.3901951466631355e-06, "loss": 0.0138, "step": 9280 }, { "epoch": 7.052571645473525, "grad_norm": 0.0182713121175766, "learning_rate": 6.381706229621117e-06, "loss": 0.0002, "step": 9290 }, { "epoch": 7.0601632188271015, "grad_norm": 0.10783802717924118, "learning_rate": 6.373212998374989e-06, "loss": 0.0337, "step": 9300 }, { "epoch": 7.067754792180679, "grad_norm": 0.001446128822863102, "learning_rate": 6.364715479443798e-06, "loss": 0.0007, "step": 9310 }, { "epoch": 7.075346365534257, "grad_norm": 0.00694943917915225, "learning_rate": 6.356213699359982e-06, "loss": 0.071, "step": 9320 }, { "epoch": 7.082937938887834, "grad_norm": 0.09859494864940643, "learning_rate": 6.347707684669278e-06, "loss": 0.0005, "step": 9330 }, { "epoch": 7.090529512241412, "grad_norm": 0.0008273068233393133, "learning_rate": 6.33919746193065e-06, "loss": 0.0, "step": 9340 }, { "epoch": 7.09812108559499, "grad_norm": 0.0038316529244184494, "learning_rate": 6.330683057716198e-06, "loss": 0.0002, "step": 9350 }, { "epoch": 7.105712658948567, "grad_norm": 0.0030708136036992073, "learning_rate": 6.322164498611081e-06, "loss": 0.0444, "step": 9360 }, { "epoch": 7.113304232302145, "grad_norm": 0.0017414516769349575, "learning_rate": 6.313641811213429e-06, "loss": 0.0001, "step": 9370 }, { "epoch": 7.1208958056557226, "grad_norm": 0.0035761166363954544, "learning_rate": 6.305115022134262e-06, "loss": 0.0001, "step": 9380 }, { "epoch": 7.128487379009299, "grad_norm": 0.006457789335399866, "learning_rate": 6.296584157997408e-06, "loss": 0.0, "step": 9390 }, { "epoch": 7.136078952362877, "grad_norm": 0.002314153825864196, "learning_rate": 6.288049245439419e-06, "loss": 0.0, "step": 9400 }, { "epoch": 7.1436705257164546, "grad_norm": 0.008694717660546303, "learning_rate": 6.279510311109487e-06, "loss": 0.0001, "step": 9410 }, { "epoch": 7.151262099070032, "grad_norm": 0.0009509180672466755, "learning_rate": 6.270967381669362e-06, "loss": 0.0001, "step": 9420 }, { "epoch": 7.15885367242361, "grad_norm": 0.009006676264107227, "learning_rate": 6.262420483793267e-06, "loss": 0.0605, "step": 9430 }, { "epoch": 7.1664452457771874, "grad_norm": 0.048271872103214264, "learning_rate": 6.253869644167816e-06, "loss": 0.3191, "step": 9440 }, { "epoch": 7.174036819130765, "grad_norm": 0.001320886891335249, "learning_rate": 6.245314889491933e-06, "loss": 0.0066, "step": 9450 }, { "epoch": 7.181628392484343, "grad_norm": 0.4799332916736603, "learning_rate": 6.236756246476765e-06, "loss": 0.0261, "step": 9460 }, { "epoch": 7.18921996583792, "grad_norm": 1.649972677230835, "learning_rate": 6.228193741845598e-06, "loss": 0.001, "step": 9470 }, { "epoch": 7.196811539191497, "grad_norm": 0.001544089405797422, "learning_rate": 6.219627402333779e-06, "loss": 0.0001, "step": 9480 }, { "epoch": 7.204403112545075, "grad_norm": 0.0058356523513793945, "learning_rate": 6.211057254688625e-06, "loss": 0.0005, "step": 9490 }, { "epoch": 7.211994685898652, "grad_norm": 0.001541537931188941, "learning_rate": 6.202483325669345e-06, "loss": 0.0001, "step": 9500 }, { "epoch": 7.21958625925223, "grad_norm": 0.002176716923713684, "learning_rate": 6.193905642046957e-06, "loss": 0.0472, "step": 9510 }, { "epoch": 7.227177832605808, "grad_norm": 1.9937260150909424, "learning_rate": 6.1853242306041995e-06, "loss": 0.1573, "step": 9520 }, { "epoch": 7.234769405959385, "grad_norm": 0.005575578194111586, "learning_rate": 6.176739118135451e-06, "loss": 0.004, "step": 9530 }, { "epoch": 7.242360979312963, "grad_norm": 0.000481792347272858, "learning_rate": 6.168150331446647e-06, "loss": 0.0001, "step": 9540 }, { "epoch": 7.2499525526665405, "grad_norm": 17.994680404663086, "learning_rate": 6.159557897355198e-06, "loss": 0.1026, "step": 9550 }, { "epoch": 7.257544126020118, "grad_norm": 0.002096704440191388, "learning_rate": 6.1509618426898934e-06, "loss": 0.0004, "step": 9560 }, { "epoch": 7.265135699373695, "grad_norm": 0.8841345906257629, "learning_rate": 6.142362194290839e-06, "loss": 0.0017, "step": 9570 }, { "epoch": 7.2727272727272725, "grad_norm": 0.0021465634927153587, "learning_rate": 6.133758979009355e-06, "loss": 0.0001, "step": 9580 }, { "epoch": 7.28031884608085, "grad_norm": 0.000766513985581696, "learning_rate": 6.1251522237078996e-06, "loss": 0.2186, "step": 9590 }, { "epoch": 7.287910419434428, "grad_norm": 0.01812721975147724, "learning_rate": 6.116541955259986e-06, "loss": 0.0007, "step": 9600 }, { "epoch": 7.295501992788005, "grad_norm": 0.00034479115856811404, "learning_rate": 6.1079282005500965e-06, "loss": 0.0055, "step": 9610 }, { "epoch": 7.303093566141583, "grad_norm": 0.0008322893991135061, "learning_rate": 6.099310986473595e-06, "loss": 0.1915, "step": 9620 }, { "epoch": 7.310685139495161, "grad_norm": 0.001017669215798378, "learning_rate": 6.090690339936651e-06, "loss": 0.0001, "step": 9630 }, { "epoch": 7.318276712848738, "grad_norm": 0.003790239803493023, "learning_rate": 6.082066287856152e-06, "loss": 0.0001, "step": 9640 }, { "epoch": 7.325868286202315, "grad_norm": 0.001801560982130468, "learning_rate": 6.073438857159617e-06, "loss": 0.0404, "step": 9650 }, { "epoch": 7.333459859555893, "grad_norm": 0.0027911756187677383, "learning_rate": 6.064808074785112e-06, "loss": 0.0215, "step": 9660 }, { "epoch": 7.34105143290947, "grad_norm": 0.001065615564584732, "learning_rate": 6.056173967681172e-06, "loss": 0.0, "step": 9670 }, { "epoch": 7.348643006263048, "grad_norm": 0.0008436132338829339, "learning_rate": 6.047536562806712e-06, "loss": 0.0001, "step": 9680 }, { "epoch": 7.3562345796166255, "grad_norm": 0.8050636053085327, "learning_rate": 6.038895887130942e-06, "loss": 0.068, "step": 9690 }, { "epoch": 7.363826152970203, "grad_norm": 0.011237557046115398, "learning_rate": 6.030251967633288e-06, "loss": 0.0001, "step": 9700 }, { "epoch": 7.371417726323781, "grad_norm": 0.0008242133189924061, "learning_rate": 6.021604831303303e-06, "loss": 0.0963, "step": 9710 }, { "epoch": 7.379009299677358, "grad_norm": 0.9633244276046753, "learning_rate": 6.012954505140582e-06, "loss": 0.0032, "step": 9720 }, { "epoch": 7.386600873030936, "grad_norm": 33.064613342285156, "learning_rate": 6.004301016154683e-06, "loss": 0.0926, "step": 9730 }, { "epoch": 7.394192446384513, "grad_norm": 0.010244650766253471, "learning_rate": 5.995644391365038e-06, "loss": 0.0, "step": 9740 }, { "epoch": 7.40178401973809, "grad_norm": 0.0010498914634808898, "learning_rate": 5.98698465780087e-06, "loss": 0.2515, "step": 9750 }, { "epoch": 7.409375593091668, "grad_norm": 0.005540487356483936, "learning_rate": 5.978321842501108e-06, "loss": 0.0001, "step": 9760 }, { "epoch": 7.416967166445246, "grad_norm": 0.001410833327099681, "learning_rate": 5.9696559725143054e-06, "loss": 0.0024, "step": 9770 }, { "epoch": 7.424558739798823, "grad_norm": 0.11642355471849442, "learning_rate": 5.960987074898553e-06, "loss": 0.0004, "step": 9780 }, { "epoch": 7.432150313152401, "grad_norm": 0.029217828065156937, "learning_rate": 5.952315176721395e-06, "loss": 0.0002, "step": 9790 }, { "epoch": 7.439741886505979, "grad_norm": 0.057612184435129166, "learning_rate": 5.943640305059742e-06, "loss": 0.0455, "step": 9800 }, { "epoch": 7.447333459859556, "grad_norm": 30.20539665222168, "learning_rate": 5.9349624869997915e-06, "loss": 0.152, "step": 9810 }, { "epoch": 7.454925033213134, "grad_norm": 0.011167285032570362, "learning_rate": 5.926281749636941e-06, "loss": 0.0013, "step": 9820 }, { "epoch": 7.462516606566711, "grad_norm": 0.01445252075791359, "learning_rate": 5.9175981200757026e-06, "loss": 0.0275, "step": 9830 }, { "epoch": 7.470108179920288, "grad_norm": 0.0006470708176493645, "learning_rate": 5.908911625429617e-06, "loss": 0.0004, "step": 9840 }, { "epoch": 7.477699753273866, "grad_norm": 0.010150356218218803, "learning_rate": 5.900222292821173e-06, "loss": 0.0572, "step": 9850 }, { "epoch": 7.4852913266274435, "grad_norm": 0.05601394549012184, "learning_rate": 5.89153014938172e-06, "loss": 0.0004, "step": 9860 }, { "epoch": 7.492882899981021, "grad_norm": 0.007213375996798277, "learning_rate": 5.8828352222513866e-06, "loss": 0.0184, "step": 9870 }, { "epoch": 7.500474473334599, "grad_norm": 0.005943207535892725, "learning_rate": 5.874137538578984e-06, "loss": 0.0519, "step": 9880 }, { "epoch": 7.508066046688176, "grad_norm": 0.005052383989095688, "learning_rate": 5.865437125521943e-06, "loss": 0.091, "step": 9890 }, { "epoch": 7.515657620041754, "grad_norm": 0.000759047397878021, "learning_rate": 5.856734010246207e-06, "loss": 0.0, "step": 9900 }, { "epoch": 7.523249193395332, "grad_norm": 0.004873152356594801, "learning_rate": 5.848028219926162e-06, "loss": 0.0001, "step": 9910 }, { "epoch": 7.530840766748908, "grad_norm": 0.0005250478279776871, "learning_rate": 5.839319781744543e-06, "loss": 0.0, "step": 9920 }, { "epoch": 7.538432340102486, "grad_norm": 0.0007055936730466783, "learning_rate": 5.830608722892352e-06, "loss": 0.0001, "step": 9930 }, { "epoch": 7.546023913456064, "grad_norm": 0.0024068867787718773, "learning_rate": 5.821895070568781e-06, "loss": 0.0001, "step": 9940 }, { "epoch": 7.553615486809641, "grad_norm": 183.14315795898438, "learning_rate": 5.813178851981112e-06, "loss": 0.1222, "step": 9950 }, { "epoch": 7.561207060163219, "grad_norm": 0.8877391219139099, "learning_rate": 5.804460094344642e-06, "loss": 0.0002, "step": 9960 }, { "epoch": 7.5687986335167965, "grad_norm": 0.006915534846484661, "learning_rate": 5.795738824882596e-06, "loss": 0.0001, "step": 9970 }, { "epoch": 7.576390206870374, "grad_norm": 9.879432678222656, "learning_rate": 5.787015070826044e-06, "loss": 0.0076, "step": 9980 }, { "epoch": 7.583981780223952, "grad_norm": 0.004392684902995825, "learning_rate": 5.77828885941381e-06, "loss": 0.0001, "step": 9990 }, { "epoch": 7.5915733535775285, "grad_norm": 0.000951431633438915, "learning_rate": 5.769560217892395e-06, "loss": 0.0002, "step": 10000 }, { "epoch": 7.599164926931106, "grad_norm": 0.0021181986667215824, "learning_rate": 5.760829173515883e-06, "loss": 0.0002, "step": 10010 }, { "epoch": 7.606756500284684, "grad_norm": 0.006260419264435768, "learning_rate": 5.752095753545864e-06, "loss": 0.0577, "step": 10020 }, { "epoch": 7.614348073638261, "grad_norm": 0.0006751982145942748, "learning_rate": 5.743359985251348e-06, "loss": 0.0292, "step": 10030 }, { "epoch": 7.621939646991839, "grad_norm": 0.00024200859479606152, "learning_rate": 5.734621895908668e-06, "loss": 0.0238, "step": 10040 }, { "epoch": 7.629531220345417, "grad_norm": 0.002035447396337986, "learning_rate": 5.725881512801413e-06, "loss": 0.0002, "step": 10050 }, { "epoch": 7.637122793698994, "grad_norm": 0.0007019038312137127, "learning_rate": 5.717138863220333e-06, "loss": 0.0982, "step": 10060 }, { "epoch": 7.644714367052572, "grad_norm": 0.0009322810219600797, "learning_rate": 5.7083939744632514e-06, "loss": 0.0001, "step": 10070 }, { "epoch": 7.6523059404061495, "grad_norm": 0.011389588937163353, "learning_rate": 5.699646873834983e-06, "loss": 0.0691, "step": 10080 }, { "epoch": 7.659897513759727, "grad_norm": 0.01710079051554203, "learning_rate": 5.690897588647253e-06, "loss": 0.0005, "step": 10090 }, { "epoch": 7.667489087113304, "grad_norm": 9.926609992980957, "learning_rate": 5.6821461462186045e-06, "loss": 0.0352, "step": 10100 }, { "epoch": 7.6750806604668815, "grad_norm": 0.00021108197688590735, "learning_rate": 5.673392573874316e-06, "loss": 0.0005, "step": 10110 }, { "epoch": 7.682672233820459, "grad_norm": 0.001629292848519981, "learning_rate": 5.6646368989463185e-06, "loss": 0.0479, "step": 10120 }, { "epoch": 7.690263807174037, "grad_norm": 0.12789593636989594, "learning_rate": 5.655879148773107e-06, "loss": 0.0183, "step": 10130 }, { "epoch": 7.697855380527614, "grad_norm": 0.001387747353874147, "learning_rate": 5.647119350699655e-06, "loss": 0.0116, "step": 10140 }, { "epoch": 7.705446953881192, "grad_norm": 0.0015600691549479961, "learning_rate": 5.638357532077331e-06, "loss": 0.0316, "step": 10150 }, { "epoch": 7.71303852723477, "grad_norm": 0.0008326400420628488, "learning_rate": 5.629593720263816e-06, "loss": 0.0002, "step": 10160 }, { "epoch": 7.720630100588347, "grad_norm": 0.023590516299009323, "learning_rate": 5.620827942623008e-06, "loss": 0.0008, "step": 10170 }, { "epoch": 7.728221673941924, "grad_norm": 0.000754083099309355, "learning_rate": 5.612060226524948e-06, "loss": 0.0365, "step": 10180 }, { "epoch": 7.735813247295502, "grad_norm": 0.011727853678166866, "learning_rate": 5.603290599345726e-06, "loss": 0.0438, "step": 10190 }, { "epoch": 7.743404820649079, "grad_norm": 0.20062032341957092, "learning_rate": 5.5945190884674065e-06, "loss": 0.0056, "step": 10200 }, { "epoch": 7.750996394002657, "grad_norm": 0.30250805616378784, "learning_rate": 5.585745721277923e-06, "loss": 0.1501, "step": 10210 }, { "epoch": 7.758587967356235, "grad_norm": 0.00017410292639397085, "learning_rate": 5.5769705251710175e-06, "loss": 0.0002, "step": 10220 }, { "epoch": 7.766179540709812, "grad_norm": 0.011902794241905212, "learning_rate": 5.568193527546135e-06, "loss": 0.0001, "step": 10230 }, { "epoch": 7.77377111406339, "grad_norm": 0.3667079508304596, "learning_rate": 5.559414755808348e-06, "loss": 0.0394, "step": 10240 }, { "epoch": 7.7813626874169675, "grad_norm": 0.001953916857019067, "learning_rate": 5.550634237368269e-06, "loss": 0.0006, "step": 10250 }, { "epoch": 7.788954260770545, "grad_norm": 0.0013212488265708089, "learning_rate": 5.541851999641964e-06, "loss": 0.0004, "step": 10260 }, { "epoch": 7.796545834124123, "grad_norm": 0.00039594716508872807, "learning_rate": 5.533068070050867e-06, "loss": 0.0322, "step": 10270 }, { "epoch": 7.8041374074776995, "grad_norm": 0.000754969718400389, "learning_rate": 5.524282476021692e-06, "loss": 0.1497, "step": 10280 }, { "epoch": 7.811728980831277, "grad_norm": 0.035513028502464294, "learning_rate": 5.515495244986356e-06, "loss": 0.0081, "step": 10290 }, { "epoch": 7.819320554184855, "grad_norm": 0.0016785170882940292, "learning_rate": 5.5067064043818815e-06, "loss": 0.0001, "step": 10300 }, { "epoch": 7.826912127538432, "grad_norm": 8.234527194872499e-05, "learning_rate": 5.49791598165032e-06, "loss": 0.0001, "step": 10310 }, { "epoch": 7.83450370089201, "grad_norm": 0.0006789985345676541, "learning_rate": 5.489124004238662e-06, "loss": 0.0393, "step": 10320 }, { "epoch": 7.842095274245588, "grad_norm": 0.0023299374151974916, "learning_rate": 5.480330499598754e-06, "loss": 0.0046, "step": 10330 }, { "epoch": 7.849686847599165, "grad_norm": 0.007388091180473566, "learning_rate": 5.471535495187207e-06, "loss": 0.0001, "step": 10340 }, { "epoch": 7.857278420952742, "grad_norm": 0.00018302824173588306, "learning_rate": 5.462739018465318e-06, "loss": 0.1768, "step": 10350 }, { "epoch": 7.86486999430632, "grad_norm": 0.00418035127222538, "learning_rate": 5.45394109689898e-06, "loss": 0.0165, "step": 10360 }, { "epoch": 7.872461567659897, "grad_norm": 0.01187161449342966, "learning_rate": 5.445141757958599e-06, "loss": 0.0004, "step": 10370 }, { "epoch": 7.880053141013475, "grad_norm": 0.08545250445604324, "learning_rate": 5.436341029119004e-06, "loss": 0.06, "step": 10380 }, { "epoch": 7.8876447143670525, "grad_norm": 0.004683859180659056, "learning_rate": 5.427538937859368e-06, "loss": 0.2187, "step": 10390 }, { "epoch": 7.89523628772063, "grad_norm": 0.0011295732110738754, "learning_rate": 5.418735511663112e-06, "loss": 0.0002, "step": 10400 }, { "epoch": 7.902827861074208, "grad_norm": 0.0021211670245975256, "learning_rate": 5.409930778017828e-06, "loss": 0.0425, "step": 10410 }, { "epoch": 7.910419434427785, "grad_norm": 0.0004632298951037228, "learning_rate": 5.401124764415192e-06, "loss": 0.0001, "step": 10420 }, { "epoch": 7.918011007781363, "grad_norm": 0.03465382754802704, "learning_rate": 5.392317498350876e-06, "loss": 0.07, "step": 10430 }, { "epoch": 7.925602581134941, "grad_norm": 0.0012545166537165642, "learning_rate": 5.38350900732446e-06, "loss": 0.0003, "step": 10440 }, { "epoch": 7.933194154488517, "grad_norm": 0.0008013694896362722, "learning_rate": 5.374699318839352e-06, "loss": 0.0001, "step": 10450 }, { "epoch": 7.940785727842095, "grad_norm": 0.01796998642385006, "learning_rate": 5.365888460402695e-06, "loss": 0.0001, "step": 10460 }, { "epoch": 7.948377301195673, "grad_norm": 0.06785059720277786, "learning_rate": 5.357076459525291e-06, "loss": 0.0002, "step": 10470 }, { "epoch": 7.95596887454925, "grad_norm": 0.001381418784148991, "learning_rate": 5.348263343721503e-06, "loss": 0.0001, "step": 10480 }, { "epoch": 7.963560447902828, "grad_norm": 0.06072179973125458, "learning_rate": 5.339449140509179e-06, "loss": 0.0002, "step": 10490 }, { "epoch": 7.9711520212564055, "grad_norm": 0.024496397003531456, "learning_rate": 5.330633877409561e-06, "loss": 0.1215, "step": 10500 }, { "epoch": 7.978743594609983, "grad_norm": 0.0315159372985363, "learning_rate": 5.3218175819472e-06, "loss": 0.0001, "step": 10510 }, { "epoch": 7.986335167963561, "grad_norm": 0.0015373720088973641, "learning_rate": 5.313000281649872e-06, "loss": 0.0003, "step": 10520 }, { "epoch": 7.9939267413171375, "grad_norm": 0.12398699671030045, "learning_rate": 5.304182004048488e-06, "loss": 0.0002, "step": 10530 }, { "epoch": 8.0, "eval_f1": 0.9590113159486987, "eval_loss": 0.15769141912460327, "eval_precision": 0.9585736334342291, "eval_recall": 0.9590288315629742, "eval_runtime": 75.7332, "eval_samples_per_second": 17.403, "eval_steps_per_second": 17.403, "step": 10538 } ], "logging_steps": 10, "max_steps": 19755, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.701261509159456e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }