roberta-large-1280-hazard / trainer_state.json
Quintu's picture
Upload 10 files
0d1df05 verified
{
"best_metric": 0.9590113159486987,
"best_model_checkpoint": "output_classification_1280/hazard/checkpoint-10538",
"epoch": 8.0,
"eval_steps": 500,
"global_step": 10538,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.007591573353577529,
"grad_norm": 27.469635009765625,
"learning_rate": 5.0607287449392715e-08,
"loss": 1.6903,
"step": 10
},
{
"epoch": 0.015183146707155058,
"grad_norm": 29.22759437561035,
"learning_rate": 9.109311740890688e-08,
"loss": 1.6631,
"step": 20
},
{
"epoch": 0.022774720060732587,
"grad_norm": 22.48965835571289,
"learning_rate": 1.417004048582996e-07,
"loss": 1.7504,
"step": 30
},
{
"epoch": 0.030366293414310117,
"grad_norm": 30.96166229248047,
"learning_rate": 1.9230769230769234e-07,
"loss": 1.7496,
"step": 40
},
{
"epoch": 0.03795786676788764,
"grad_norm": 28.63855743408203,
"learning_rate": 2.4291497975708504e-07,
"loss": 1.6787,
"step": 50
},
{
"epoch": 0.045549440121465175,
"grad_norm": 31.33084487915039,
"learning_rate": 2.9352226720647774e-07,
"loss": 1.7747,
"step": 60
},
{
"epoch": 0.0531410134750427,
"grad_norm": 27.18292236328125,
"learning_rate": 3.390688259109312e-07,
"loss": 1.6577,
"step": 70
},
{
"epoch": 0.06073258682862023,
"grad_norm": 30.794124603271484,
"learning_rate": 3.896761133603239e-07,
"loss": 1.7097,
"step": 80
},
{
"epoch": 0.06832416018219777,
"grad_norm": 42.49530792236328,
"learning_rate": 4.402834008097166e-07,
"loss": 1.5986,
"step": 90
},
{
"epoch": 0.07591573353577528,
"grad_norm": 26.470556259155273,
"learning_rate": 4.908906882591093e-07,
"loss": 1.7342,
"step": 100
},
{
"epoch": 0.08350730688935282,
"grad_norm": 40.713924407958984,
"learning_rate": 5.414979757085021e-07,
"loss": 1.517,
"step": 110
},
{
"epoch": 0.09109888024293035,
"grad_norm": 53.97127914428711,
"learning_rate": 5.921052631578947e-07,
"loss": 1.3995,
"step": 120
},
{
"epoch": 0.09869045359650788,
"grad_norm": 45.6757698059082,
"learning_rate": 6.427125506072875e-07,
"loss": 1.2737,
"step": 130
},
{
"epoch": 0.1062820269500854,
"grad_norm": 35.03736114501953,
"learning_rate": 6.933198380566802e-07,
"loss": 1.3719,
"step": 140
},
{
"epoch": 0.11387360030366293,
"grad_norm": 26.410057067871094,
"learning_rate": 7.388663967611337e-07,
"loss": 1.1505,
"step": 150
},
{
"epoch": 0.12146517365724047,
"grad_norm": 30.611797332763672,
"learning_rate": 7.844129554655872e-07,
"loss": 1.3579,
"step": 160
},
{
"epoch": 0.12905674701081798,
"grad_norm": 36.64908981323242,
"learning_rate": 8.350202429149798e-07,
"loss": 1.2164,
"step": 170
},
{
"epoch": 0.13664832036439553,
"grad_norm": 47.913612365722656,
"learning_rate": 8.805668016194332e-07,
"loss": 1.1154,
"step": 180
},
{
"epoch": 0.14423989371797305,
"grad_norm": 71.07138061523438,
"learning_rate": 9.31174089068826e-07,
"loss": 1.2263,
"step": 190
},
{
"epoch": 0.15183146707155057,
"grad_norm": 46.60552978515625,
"learning_rate": 9.817813765182186e-07,
"loss": 1.3512,
"step": 200
},
{
"epoch": 0.15942304042512812,
"grad_norm": 39.1867561340332,
"learning_rate": 1.0323886639676114e-06,
"loss": 1.095,
"step": 210
},
{
"epoch": 0.16701461377870563,
"grad_norm": 48.71131896972656,
"learning_rate": 1.0829959514170041e-06,
"loss": 1.401,
"step": 220
},
{
"epoch": 0.17460618713228315,
"grad_norm": 46.5413703918457,
"learning_rate": 1.133603238866397e-06,
"loss": 1.4863,
"step": 230
},
{
"epoch": 0.1821977604858607,
"grad_norm": 45.00301742553711,
"learning_rate": 1.1842105263157894e-06,
"loss": 1.0144,
"step": 240
},
{
"epoch": 0.18978933383943822,
"grad_norm": 66.17977905273438,
"learning_rate": 1.2348178137651822e-06,
"loss": 1.2741,
"step": 250
},
{
"epoch": 0.19738090719301576,
"grad_norm": 71.00930786132812,
"learning_rate": 1.285425101214575e-06,
"loss": 1.3536,
"step": 260
},
{
"epoch": 0.20497248054659328,
"grad_norm": 66.67515563964844,
"learning_rate": 1.336032388663968e-06,
"loss": 1.247,
"step": 270
},
{
"epoch": 0.2125640539001708,
"grad_norm": 47.43987274169922,
"learning_rate": 1.3866396761133605e-06,
"loss": 1.2843,
"step": 280
},
{
"epoch": 0.22015562725374835,
"grad_norm": 41.783695220947266,
"learning_rate": 1.4372469635627532e-06,
"loss": 1.401,
"step": 290
},
{
"epoch": 0.22774720060732587,
"grad_norm": 61.59716796875,
"learning_rate": 1.4878542510121458e-06,
"loss": 1.134,
"step": 300
},
{
"epoch": 0.23533877396090339,
"grad_norm": 52.884761810302734,
"learning_rate": 1.5384615384615387e-06,
"loss": 1.2276,
"step": 310
},
{
"epoch": 0.24293034731448093,
"grad_norm": 43.76587677001953,
"learning_rate": 1.5890688259109313e-06,
"loss": 1.1747,
"step": 320
},
{
"epoch": 0.25052192066805845,
"grad_norm": 24.984729766845703,
"learning_rate": 1.639676113360324e-06,
"loss": 1.2557,
"step": 330
},
{
"epoch": 0.25811349402163597,
"grad_norm": 32.03645324707031,
"learning_rate": 1.6902834008097168e-06,
"loss": 1.0106,
"step": 340
},
{
"epoch": 0.2657050673752135,
"grad_norm": 37.198177337646484,
"learning_rate": 1.7408906882591095e-06,
"loss": 1.0211,
"step": 350
},
{
"epoch": 0.27329664072879106,
"grad_norm": 46.07294464111328,
"learning_rate": 1.791497975708502e-06,
"loss": 1.019,
"step": 360
},
{
"epoch": 0.2808882140823686,
"grad_norm": 61.57015609741211,
"learning_rate": 1.8370445344129556e-06,
"loss": 1.3087,
"step": 370
},
{
"epoch": 0.2884797874359461,
"grad_norm": 37.366268157958984,
"learning_rate": 1.8876518218623483e-06,
"loss": 0.9793,
"step": 380
},
{
"epoch": 0.2960713607895236,
"grad_norm": 25.108686447143555,
"learning_rate": 1.938259109311741e-06,
"loss": 1.2809,
"step": 390
},
{
"epoch": 0.30366293414310114,
"grad_norm": 41.831172943115234,
"learning_rate": 1.988866396761134e-06,
"loss": 1.04,
"step": 400
},
{
"epoch": 0.3112545074966787,
"grad_norm": 53.10079574584961,
"learning_rate": 2.0394736842105266e-06,
"loss": 1.1906,
"step": 410
},
{
"epoch": 0.31884608085025623,
"grad_norm": 38.19053649902344,
"learning_rate": 2.090080971659919e-06,
"loss": 1.1724,
"step": 420
},
{
"epoch": 0.32643765420383375,
"grad_norm": 43.842498779296875,
"learning_rate": 2.140688259109312e-06,
"loss": 1.1657,
"step": 430
},
{
"epoch": 0.33402922755741127,
"grad_norm": 54.60807418823242,
"learning_rate": 2.1912955465587044e-06,
"loss": 0.9103,
"step": 440
},
{
"epoch": 0.3416208009109888,
"grad_norm": 48.880218505859375,
"learning_rate": 2.241902834008097e-06,
"loss": 1.1537,
"step": 450
},
{
"epoch": 0.3492123742645663,
"grad_norm": 40.26908493041992,
"learning_rate": 2.2925101214574904e-06,
"loss": 1.0753,
"step": 460
},
{
"epoch": 0.3568039476181439,
"grad_norm": 65.76298522949219,
"learning_rate": 2.3431174089068827e-06,
"loss": 0.9305,
"step": 470
},
{
"epoch": 0.3643955209717214,
"grad_norm": 33.159881591796875,
"learning_rate": 2.3937246963562755e-06,
"loss": 0.9382,
"step": 480
},
{
"epoch": 0.3719870943252989,
"grad_norm": 32.02263259887695,
"learning_rate": 2.4443319838056682e-06,
"loss": 0.8098,
"step": 490
},
{
"epoch": 0.37957866767887644,
"grad_norm": 50.231842041015625,
"learning_rate": 2.494939271255061e-06,
"loss": 1.0712,
"step": 500
},
{
"epoch": 0.38717024103245395,
"grad_norm": 54.17763137817383,
"learning_rate": 2.5455465587044537e-06,
"loss": 0.9814,
"step": 510
},
{
"epoch": 0.39476181438603153,
"grad_norm": 25.580745697021484,
"learning_rate": 2.5961538461538465e-06,
"loss": 0.5809,
"step": 520
},
{
"epoch": 0.40235338773960905,
"grad_norm": 5.449360370635986,
"learning_rate": 2.646761133603239e-06,
"loss": 0.5567,
"step": 530
},
{
"epoch": 0.40994496109318657,
"grad_norm": 29.534494400024414,
"learning_rate": 2.697368421052632e-06,
"loss": 0.5823,
"step": 540
},
{
"epoch": 0.4175365344467641,
"grad_norm": 13.788243293762207,
"learning_rate": 2.7479757085020247e-06,
"loss": 0.9266,
"step": 550
},
{
"epoch": 0.4251281078003416,
"grad_norm": 32.08829879760742,
"learning_rate": 2.798582995951417e-06,
"loss": 0.432,
"step": 560
},
{
"epoch": 0.4327196811539191,
"grad_norm": 12.410125732421875,
"learning_rate": 2.8491902834008103e-06,
"loss": 0.7482,
"step": 570
},
{
"epoch": 0.4403112545074967,
"grad_norm": 24.522109985351562,
"learning_rate": 2.8997975708502026e-06,
"loss": 0.504,
"step": 580
},
{
"epoch": 0.4479028278610742,
"grad_norm": 17.949840545654297,
"learning_rate": 2.9504048582995953e-06,
"loss": 0.6836,
"step": 590
},
{
"epoch": 0.45549440121465173,
"grad_norm": 13.618581771850586,
"learning_rate": 3.001012145748988e-06,
"loss": 0.6873,
"step": 600
},
{
"epoch": 0.46308597456822925,
"grad_norm": 50.62519454956055,
"learning_rate": 3.0516194331983804e-06,
"loss": 0.4781,
"step": 610
},
{
"epoch": 0.47067754792180677,
"grad_norm": 27.868289947509766,
"learning_rate": 3.1022267206477736e-06,
"loss": 0.7148,
"step": 620
},
{
"epoch": 0.47826912127538435,
"grad_norm": 30.8429012298584,
"learning_rate": 3.1528340080971664e-06,
"loss": 0.591,
"step": 630
},
{
"epoch": 0.48586069462896186,
"grad_norm": 51.042518615722656,
"learning_rate": 3.2034412955465587e-06,
"loss": 0.5481,
"step": 640
},
{
"epoch": 0.4934522679825394,
"grad_norm": 42.53914260864258,
"learning_rate": 3.254048582995952e-06,
"loss": 0.6404,
"step": 650
},
{
"epoch": 0.5010438413361169,
"grad_norm": 28.016672134399414,
"learning_rate": 3.3046558704453446e-06,
"loss": 1.075,
"step": 660
},
{
"epoch": 0.5086354146896944,
"grad_norm": 26.764345169067383,
"learning_rate": 3.355263157894737e-06,
"loss": 0.5689,
"step": 670
},
{
"epoch": 0.5162269880432719,
"grad_norm": 10.721156120300293,
"learning_rate": 3.40587044534413e-06,
"loss": 0.302,
"step": 680
},
{
"epoch": 0.5238185613968495,
"grad_norm": 33.98798751831055,
"learning_rate": 3.4564777327935225e-06,
"loss": 0.3699,
"step": 690
},
{
"epoch": 0.531410134750427,
"grad_norm": 98.7930908203125,
"learning_rate": 3.5070850202429152e-06,
"loss": 0.5585,
"step": 700
},
{
"epoch": 0.5390017081040046,
"grad_norm": 17.008193969726562,
"learning_rate": 3.557692307692308e-06,
"loss": 0.5513,
"step": 710
},
{
"epoch": 0.5465932814575821,
"grad_norm": 0.9657185077667236,
"learning_rate": 3.6082995951417003e-06,
"loss": 0.3778,
"step": 720
},
{
"epoch": 0.5541848548111596,
"grad_norm": 22.920196533203125,
"learning_rate": 3.6589068825910935e-06,
"loss": 0.2108,
"step": 730
},
{
"epoch": 0.5617764281647372,
"grad_norm": 24.24422264099121,
"learning_rate": 3.7095141700404862e-06,
"loss": 0.774,
"step": 740
},
{
"epoch": 0.5693680015183147,
"grad_norm": 10.006725311279297,
"learning_rate": 3.7601214574898786e-06,
"loss": 0.3806,
"step": 750
},
{
"epoch": 0.5769595748718922,
"grad_norm": 25.408447265625,
"learning_rate": 3.8107287449392717e-06,
"loss": 0.3539,
"step": 760
},
{
"epoch": 0.5845511482254697,
"grad_norm": 1.4603581428527832,
"learning_rate": 3.8613360323886645e-06,
"loss": 0.2608,
"step": 770
},
{
"epoch": 0.5921427215790472,
"grad_norm": 16.798980712890625,
"learning_rate": 3.911943319838057e-06,
"loss": 0.3287,
"step": 780
},
{
"epoch": 0.5997342949326248,
"grad_norm": 11.706854820251465,
"learning_rate": 3.96255060728745e-06,
"loss": 0.5302,
"step": 790
},
{
"epoch": 0.6073258682862023,
"grad_norm": 20.42545509338379,
"learning_rate": 4.013157894736842e-06,
"loss": 0.489,
"step": 800
},
{
"epoch": 0.6149174416397798,
"grad_norm": 44.284629821777344,
"learning_rate": 4.063765182186235e-06,
"loss": 0.4183,
"step": 810
},
{
"epoch": 0.6225090149933574,
"grad_norm": 35.91806411743164,
"learning_rate": 4.114372469635628e-06,
"loss": 0.6323,
"step": 820
},
{
"epoch": 0.6301005883469349,
"grad_norm": 5.10564661026001,
"learning_rate": 4.16497975708502e-06,
"loss": 0.2137,
"step": 830
},
{
"epoch": 0.6376921617005125,
"grad_norm": 14.327881813049316,
"learning_rate": 4.215587044534413e-06,
"loss": 0.1283,
"step": 840
},
{
"epoch": 0.64528373505409,
"grad_norm": 0.4119018018245697,
"learning_rate": 4.2661943319838065e-06,
"loss": 0.5361,
"step": 850
},
{
"epoch": 0.6528753084076675,
"grad_norm": 0.33248305320739746,
"learning_rate": 4.316801619433199e-06,
"loss": 0.3669,
"step": 860
},
{
"epoch": 0.660466881761245,
"grad_norm": 0.44110462069511414,
"learning_rate": 4.367408906882591e-06,
"loss": 0.2979,
"step": 870
},
{
"epoch": 0.6680584551148225,
"grad_norm": 0.34030860662460327,
"learning_rate": 4.418016194331984e-06,
"loss": 0.2611,
"step": 880
},
{
"epoch": 0.6756500284684,
"grad_norm": 46.19267272949219,
"learning_rate": 4.468623481781377e-06,
"loss": 0.2948,
"step": 890
},
{
"epoch": 0.6832416018219776,
"grad_norm": 33.486717224121094,
"learning_rate": 4.51923076923077e-06,
"loss": 0.2593,
"step": 900
},
{
"epoch": 0.6908331751755551,
"grad_norm": 43.04954528808594,
"learning_rate": 4.569838056680162e-06,
"loss": 0.2791,
"step": 910
},
{
"epoch": 0.6984247485291326,
"grad_norm": 57.075809478759766,
"learning_rate": 4.6204453441295545e-06,
"loss": 0.2198,
"step": 920
},
{
"epoch": 0.7060163218827102,
"grad_norm": 52.269168853759766,
"learning_rate": 4.671052631578948e-06,
"loss": 0.4377,
"step": 930
},
{
"epoch": 0.7136078952362878,
"grad_norm": 0.06886545568704605,
"learning_rate": 4.72165991902834e-06,
"loss": 0.1961,
"step": 940
},
{
"epoch": 0.7211994685898653,
"grad_norm": 41.10899353027344,
"learning_rate": 4.772267206477733e-06,
"loss": 0.4847,
"step": 950
},
{
"epoch": 0.7287910419434428,
"grad_norm": 2.2750415802001953,
"learning_rate": 4.822874493927126e-06,
"loss": 0.2632,
"step": 960
},
{
"epoch": 0.7363826152970203,
"grad_norm": 0.374896764755249,
"learning_rate": 4.873481781376519e-06,
"loss": 0.2243,
"step": 970
},
{
"epoch": 0.7439741886505978,
"grad_norm": 29.88395118713379,
"learning_rate": 4.924089068825911e-06,
"loss": 0.256,
"step": 980
},
{
"epoch": 0.7515657620041754,
"grad_norm": 48.7998161315918,
"learning_rate": 4.974696356275304e-06,
"loss": 0.2969,
"step": 990
},
{
"epoch": 0.7591573353577529,
"grad_norm": 33.62395095825195,
"learning_rate": 5.025303643724697e-06,
"loss": 0.4137,
"step": 1000
},
{
"epoch": 0.7667489087113304,
"grad_norm": 37.788795471191406,
"learning_rate": 5.07591093117409e-06,
"loss": 0.2332,
"step": 1010
},
{
"epoch": 0.7743404820649079,
"grad_norm": 0.14387387037277222,
"learning_rate": 5.126518218623482e-06,
"loss": 0.2919,
"step": 1020
},
{
"epoch": 0.7819320554184854,
"grad_norm": 5.84027099609375,
"learning_rate": 5.177125506072875e-06,
"loss": 0.2009,
"step": 1030
},
{
"epoch": 0.7895236287720631,
"grad_norm": 0.04207382723689079,
"learning_rate": 5.227732793522268e-06,
"loss": 0.2878,
"step": 1040
},
{
"epoch": 0.7971152021256406,
"grad_norm": 45.870460510253906,
"learning_rate": 5.27834008097166e-06,
"loss": 0.2449,
"step": 1050
},
{
"epoch": 0.8047067754792181,
"grad_norm": 51.27897262573242,
"learning_rate": 5.328947368421054e-06,
"loss": 0.2529,
"step": 1060
},
{
"epoch": 0.8122983488327956,
"grad_norm": 0.5692594051361084,
"learning_rate": 5.379554655870446e-06,
"loss": 0.6134,
"step": 1070
},
{
"epoch": 0.8198899221863731,
"grad_norm": 0.09678292274475098,
"learning_rate": 5.430161943319839e-06,
"loss": 0.2444,
"step": 1080
},
{
"epoch": 0.8274814955399507,
"grad_norm": 1.1001735925674438,
"learning_rate": 5.480769230769232e-06,
"loss": 0.2606,
"step": 1090
},
{
"epoch": 0.8350730688935282,
"grad_norm": 0.183668851852417,
"learning_rate": 5.531376518218624e-06,
"loss": 0.4214,
"step": 1100
},
{
"epoch": 0.8426646422471057,
"grad_norm": 1.1151483058929443,
"learning_rate": 5.5819838056680164e-06,
"loss": 0.1742,
"step": 1110
},
{
"epoch": 0.8502562156006832,
"grad_norm": 4.986824989318848,
"learning_rate": 5.632591093117409e-06,
"loss": 0.2275,
"step": 1120
},
{
"epoch": 0.8578477889542607,
"grad_norm": 40.5273323059082,
"learning_rate": 5.683198380566802e-06,
"loss": 0.2895,
"step": 1130
},
{
"epoch": 0.8654393623078382,
"grad_norm": 27.60036849975586,
"learning_rate": 5.733805668016194e-06,
"loss": 0.1973,
"step": 1140
},
{
"epoch": 0.8730309356614159,
"grad_norm": 0.3474140763282776,
"learning_rate": 5.784412955465587e-06,
"loss": 0.2508,
"step": 1150
},
{
"epoch": 0.8806225090149934,
"grad_norm": 41.10483169555664,
"learning_rate": 5.835020242914981e-06,
"loss": 0.1403,
"step": 1160
},
{
"epoch": 0.8882140823685709,
"grad_norm": 38.43809509277344,
"learning_rate": 5.885627530364373e-06,
"loss": 0.1639,
"step": 1170
},
{
"epoch": 0.8958056557221484,
"grad_norm": 0.1039985790848732,
"learning_rate": 5.936234817813765e-06,
"loss": 0.3821,
"step": 1180
},
{
"epoch": 0.903397229075726,
"grad_norm": 7.511643409729004,
"learning_rate": 5.9868421052631585e-06,
"loss": 0.3217,
"step": 1190
},
{
"epoch": 0.9109888024293035,
"grad_norm": 1.0779646635055542,
"learning_rate": 6.037449392712551e-06,
"loss": 0.306,
"step": 1200
},
{
"epoch": 0.918580375782881,
"grad_norm": 36.478607177734375,
"learning_rate": 6.088056680161943e-06,
"loss": 0.2311,
"step": 1210
},
{
"epoch": 0.9261719491364585,
"grad_norm": 5.484299659729004,
"learning_rate": 6.138663967611337e-06,
"loss": 0.1957,
"step": 1220
},
{
"epoch": 0.933763522490036,
"grad_norm": 36.05448532104492,
"learning_rate": 6.1892712550607295e-06,
"loss": 0.1806,
"step": 1230
},
{
"epoch": 0.9413550958436135,
"grad_norm": 0.11734521389007568,
"learning_rate": 6.239878542510122e-06,
"loss": 0.1755,
"step": 1240
},
{
"epoch": 0.9489466691971912,
"grad_norm": 29.589279174804688,
"learning_rate": 6.290485829959515e-06,
"loss": 0.1282,
"step": 1250
},
{
"epoch": 0.9565382425507687,
"grad_norm": 0.0918528214097023,
"learning_rate": 6.341093117408907e-06,
"loss": 0.1178,
"step": 1260
},
{
"epoch": 0.9641298159043462,
"grad_norm": 23.708993911743164,
"learning_rate": 6.3917004048583e-06,
"loss": 0.4097,
"step": 1270
},
{
"epoch": 0.9717213892579237,
"grad_norm": 34.241607666015625,
"learning_rate": 6.442307692307693e-06,
"loss": 0.25,
"step": 1280
},
{
"epoch": 0.9793129626115012,
"grad_norm": 3.067420482635498,
"learning_rate": 6.492914979757086e-06,
"loss": 0.201,
"step": 1290
},
{
"epoch": 0.9869045359650788,
"grad_norm": 20.88066291809082,
"learning_rate": 6.543522267206478e-06,
"loss": 0.2371,
"step": 1300
},
{
"epoch": 0.9944961093186563,
"grad_norm": 463.48541259765625,
"learning_rate": 6.5941295546558715e-06,
"loss": 0.436,
"step": 1310
},
{
"epoch": 0.9998102106661606,
"eval_f1": 0.8680898011188399,
"eval_loss": 0.3059525787830353,
"eval_precision": 0.8342809981821465,
"eval_recall": 0.8694992412746586,
"eval_runtime": 75.6444,
"eval_samples_per_second": 17.424,
"eval_steps_per_second": 17.424,
"step": 1317
},
{
"epoch": 1.0020876826722338,
"grad_norm": 49.61616897583008,
"learning_rate": 6.644736842105264e-06,
"loss": 0.0671,
"step": 1320
},
{
"epoch": 1.0096792560258114,
"grad_norm": 0.10375616699457169,
"learning_rate": 6.695344129554656e-06,
"loss": 0.3414,
"step": 1330
},
{
"epoch": 1.0172708293793888,
"grad_norm": 1.4322081804275513,
"learning_rate": 6.745951417004049e-06,
"loss": 0.4417,
"step": 1340
},
{
"epoch": 1.0248624027329665,
"grad_norm": 33.353389739990234,
"learning_rate": 6.796558704453442e-06,
"loss": 0.2982,
"step": 1350
},
{
"epoch": 1.0324539760865439,
"grad_norm": 0.03437357768416405,
"learning_rate": 6.847165991902834e-06,
"loss": 0.1854,
"step": 1360
},
{
"epoch": 1.0400455494401215,
"grad_norm": 25.882795333862305,
"learning_rate": 6.897773279352226e-06,
"loss": 0.2332,
"step": 1370
},
{
"epoch": 1.047637122793699,
"grad_norm": 0.06532780081033707,
"learning_rate": 6.94838056680162e-06,
"loss": 0.0215,
"step": 1380
},
{
"epoch": 1.0552286961472765,
"grad_norm": 32.39247131347656,
"learning_rate": 6.998987854251013e-06,
"loss": 0.072,
"step": 1390
},
{
"epoch": 1.062820269500854,
"grad_norm": 0.027906494215130806,
"learning_rate": 7.049595141700405e-06,
"loss": 0.2047,
"step": 1400
},
{
"epoch": 1.0704118428544316,
"grad_norm": 0.6285625100135803,
"learning_rate": 7.100202429149798e-06,
"loss": 0.1842,
"step": 1410
},
{
"epoch": 1.0780034162080092,
"grad_norm": 45.90504837036133,
"learning_rate": 7.1508097165991906e-06,
"loss": 0.3873,
"step": 1420
},
{
"epoch": 1.0855949895615866,
"grad_norm": 0.1192048192024231,
"learning_rate": 7.201417004048583e-06,
"loss": 0.0922,
"step": 1430
},
{
"epoch": 1.0931865629151643,
"grad_norm": 0.01541421003639698,
"learning_rate": 7.252024291497977e-06,
"loss": 0.2405,
"step": 1440
},
{
"epoch": 1.1007781362687417,
"grad_norm": 1.1560391187667847,
"learning_rate": 7.302631578947369e-06,
"loss": 0.127,
"step": 1450
},
{
"epoch": 1.1083697096223193,
"grad_norm": 0.03278697654604912,
"learning_rate": 7.353238866396762e-06,
"loss": 0.1782,
"step": 1460
},
{
"epoch": 1.1159612829758967,
"grad_norm": 0.019922947511076927,
"learning_rate": 7.403846153846155e-06,
"loss": 0.0464,
"step": 1470
},
{
"epoch": 1.1235528563294743,
"grad_norm": 0.06464574486017227,
"learning_rate": 7.454453441295547e-06,
"loss": 0.1965,
"step": 1480
},
{
"epoch": 1.1311444296830517,
"grad_norm": 3.4782345294952393,
"learning_rate": 7.505060728744939e-06,
"loss": 0.2716,
"step": 1490
},
{
"epoch": 1.1387360030366294,
"grad_norm": 11.709443092346191,
"learning_rate": 7.555668016194333e-06,
"loss": 0.0861,
"step": 1500
},
{
"epoch": 1.146327576390207,
"grad_norm": 0.06341992318630219,
"learning_rate": 7.606275303643725e-06,
"loss": 0.2737,
"step": 1510
},
{
"epoch": 1.1539191497437844,
"grad_norm": 0.41259318590164185,
"learning_rate": 7.656882591093118e-06,
"loss": 0.0553,
"step": 1520
},
{
"epoch": 1.1615107230973618,
"grad_norm": 0.2754802107810974,
"learning_rate": 7.70748987854251e-06,
"loss": 0.1928,
"step": 1530
},
{
"epoch": 1.1691022964509394,
"grad_norm": 35.12890625,
"learning_rate": 7.758097165991903e-06,
"loss": 0.2975,
"step": 1540
},
{
"epoch": 1.176693869804517,
"grad_norm": 0.01568063162267208,
"learning_rate": 7.808704453441295e-06,
"loss": 0.0757,
"step": 1550
},
{
"epoch": 1.1842854431580945,
"grad_norm": 63.980228424072266,
"learning_rate": 7.859311740890689e-06,
"loss": 0.2807,
"step": 1560
},
{
"epoch": 1.191877016511672,
"grad_norm": 0.28402331471443176,
"learning_rate": 7.909919028340081e-06,
"loss": 0.0808,
"step": 1570
},
{
"epoch": 1.1994685898652495,
"grad_norm": 0.028258422389626503,
"learning_rate": 7.960526315789474e-06,
"loss": 0.1397,
"step": 1580
},
{
"epoch": 1.2070601632188271,
"grad_norm": 3.0772080421447754,
"learning_rate": 8.011133603238868e-06,
"loss": 0.0761,
"step": 1590
},
{
"epoch": 1.2146517365724046,
"grad_norm": 0.37224826216697693,
"learning_rate": 8.06174089068826e-06,
"loss": 0.2265,
"step": 1600
},
{
"epoch": 1.2222433099259822,
"grad_norm": 0.02686912938952446,
"learning_rate": 8.112348178137652e-06,
"loss": 0.0467,
"step": 1610
},
{
"epoch": 1.2298348832795596,
"grad_norm": 0.040963444858789444,
"learning_rate": 8.162955465587045e-06,
"loss": 0.3815,
"step": 1620
},
{
"epoch": 1.2374264566331372,
"grad_norm": 15.119370460510254,
"learning_rate": 8.213562753036439e-06,
"loss": 0.1005,
"step": 1630
},
{
"epoch": 1.2450180299867148,
"grad_norm": 35.875064849853516,
"learning_rate": 8.264170040485831e-06,
"loss": 0.3051,
"step": 1640
},
{
"epoch": 1.2526096033402923,
"grad_norm": 70.46387481689453,
"learning_rate": 8.314777327935223e-06,
"loss": 0.175,
"step": 1650
},
{
"epoch": 1.2602011766938699,
"grad_norm": 0.02256329357624054,
"learning_rate": 8.365384615384616e-06,
"loss": 0.2415,
"step": 1660
},
{
"epoch": 1.2677927500474473,
"grad_norm": 35.11568069458008,
"learning_rate": 8.415991902834008e-06,
"loss": 0.2629,
"step": 1670
},
{
"epoch": 1.275384323401025,
"grad_norm": 71.48613739013672,
"learning_rate": 8.4665991902834e-06,
"loss": 0.4151,
"step": 1680
},
{
"epoch": 1.2829758967546023,
"grad_norm": 78.90449523925781,
"learning_rate": 8.517206477732795e-06,
"loss": 0.1292,
"step": 1690
},
{
"epoch": 1.29056747010818,
"grad_norm": 31.373775482177734,
"learning_rate": 8.567813765182187e-06,
"loss": 0.26,
"step": 1700
},
{
"epoch": 1.2981590434617574,
"grad_norm": 0.049251481890678406,
"learning_rate": 8.61842105263158e-06,
"loss": 0.4099,
"step": 1710
},
{
"epoch": 1.305750616815335,
"grad_norm": 26.275672912597656,
"learning_rate": 8.669028340080973e-06,
"loss": 0.1674,
"step": 1720
},
{
"epoch": 1.3133421901689126,
"grad_norm": 56.808570861816406,
"learning_rate": 8.719635627530366e-06,
"loss": 0.2071,
"step": 1730
},
{
"epoch": 1.32093376352249,
"grad_norm": 12.969684600830078,
"learning_rate": 8.770242914979758e-06,
"loss": 0.3515,
"step": 1740
},
{
"epoch": 1.3285253368760674,
"grad_norm": 0.2686771750450134,
"learning_rate": 8.82085020242915e-06,
"loss": 0.128,
"step": 1750
},
{
"epoch": 1.336116910229645,
"grad_norm": 0.012039333581924438,
"learning_rate": 8.871457489878543e-06,
"loss": 0.1058,
"step": 1760
},
{
"epoch": 1.3437084835832227,
"grad_norm": 20.223878860473633,
"learning_rate": 8.922064777327935e-06,
"loss": 0.196,
"step": 1770
},
{
"epoch": 1.3513000569368,
"grad_norm": 0.014049122110009193,
"learning_rate": 8.972672064777329e-06,
"loss": 0.3733,
"step": 1780
},
{
"epoch": 1.3588916302903777,
"grad_norm": 42.03798294067383,
"learning_rate": 9.023279352226721e-06,
"loss": 0.1683,
"step": 1790
},
{
"epoch": 1.3664832036439551,
"grad_norm": 0.044906727969646454,
"learning_rate": 9.073886639676114e-06,
"loss": 0.2116,
"step": 1800
},
{
"epoch": 1.3740747769975328,
"grad_norm": 33.70309829711914,
"learning_rate": 9.124493927125508e-06,
"loss": 0.3049,
"step": 1810
},
{
"epoch": 1.3816663503511104,
"grad_norm": 8.82701301574707,
"learning_rate": 9.1751012145749e-06,
"loss": 0.0822,
"step": 1820
},
{
"epoch": 1.3892579237046878,
"grad_norm": 2.3878729343414307,
"learning_rate": 9.225708502024292e-06,
"loss": 0.0592,
"step": 1830
},
{
"epoch": 1.3968494970582652,
"grad_norm": 0.0016124140238389373,
"learning_rate": 9.276315789473686e-06,
"loss": 0.1208,
"step": 1840
},
{
"epoch": 1.4044410704118429,
"grad_norm": 0.13426095247268677,
"learning_rate": 9.326923076923079e-06,
"loss": 0.2488,
"step": 1850
},
{
"epoch": 1.4120326437654205,
"grad_norm": 95.8023681640625,
"learning_rate": 9.377530364372471e-06,
"loss": 0.3505,
"step": 1860
},
{
"epoch": 1.4196242171189979,
"grad_norm": 0.10023036599159241,
"learning_rate": 9.428137651821863e-06,
"loss": 0.2593,
"step": 1870
},
{
"epoch": 1.4272157904725755,
"grad_norm": 0.0036512434016913176,
"learning_rate": 9.478744939271256e-06,
"loss": 0.1653,
"step": 1880
},
{
"epoch": 1.434807363826153,
"grad_norm": 0.11651404201984406,
"learning_rate": 9.529352226720648e-06,
"loss": 0.1281,
"step": 1890
},
{
"epoch": 1.4423989371797306,
"grad_norm": 99.45907592773438,
"learning_rate": 9.57995951417004e-06,
"loss": 0.2001,
"step": 1900
},
{
"epoch": 1.449990510533308,
"grad_norm": 0.42387983202934265,
"learning_rate": 9.630566801619434e-06,
"loss": 0.2895,
"step": 1910
},
{
"epoch": 1.4575820838868856,
"grad_norm": 155.79856872558594,
"learning_rate": 9.681174089068827e-06,
"loss": 0.2749,
"step": 1920
},
{
"epoch": 1.465173657240463,
"grad_norm": 0.036998867988586426,
"learning_rate": 9.731781376518219e-06,
"loss": 0.3386,
"step": 1930
},
{
"epoch": 1.4727652305940406,
"grad_norm": 20.147798538208008,
"learning_rate": 9.782388663967613e-06,
"loss": 0.0259,
"step": 1940
},
{
"epoch": 1.4803568039476183,
"grad_norm": 0.6697649955749512,
"learning_rate": 9.832995951417005e-06,
"loss": 0.0671,
"step": 1950
},
{
"epoch": 1.4879483773011957,
"grad_norm": 34.21855545043945,
"learning_rate": 9.883603238866398e-06,
"loss": 0.4116,
"step": 1960
},
{
"epoch": 1.495539950654773,
"grad_norm": 55.607818603515625,
"learning_rate": 9.93421052631579e-06,
"loss": 0.2809,
"step": 1970
},
{
"epoch": 1.5031315240083507,
"grad_norm": 7.255304336547852,
"learning_rate": 9.984817813765182e-06,
"loss": 0.2086,
"step": 1980
},
{
"epoch": 1.5107230973619283,
"grad_norm": 0.03336051478981972,
"learning_rate": 9.999996175090899e-06,
"loss": 0.0513,
"step": 1990
},
{
"epoch": 1.5183146707155057,
"grad_norm": 0.016688983887434006,
"learning_rate": 9.999977440856317e-06,
"loss": 0.1644,
"step": 2000
},
{
"epoch": 1.5259062440690834,
"grad_norm": 25.093719482421875,
"learning_rate": 9.999943094820354e-06,
"loss": 0.2127,
"step": 2010
},
{
"epoch": 1.5334978174226608,
"grad_norm": 28.240819931030273,
"learning_rate": 9.999893137090254e-06,
"loss": 0.2039,
"step": 2020
},
{
"epoch": 1.5410893907762384,
"grad_norm": 0.2675958275794983,
"learning_rate": 9.999827567822e-06,
"loss": 0.1192,
"step": 2030
},
{
"epoch": 1.548680964129816,
"grad_norm": 0.0035021628718823195,
"learning_rate": 9.999746387220327e-06,
"loss": 0.4307,
"step": 2040
},
{
"epoch": 1.5562725374833934,
"grad_norm": 45.449134826660156,
"learning_rate": 9.999649595538705e-06,
"loss": 0.1564,
"step": 2050
},
{
"epoch": 1.5638641108369709,
"grad_norm": 28.17760467529297,
"learning_rate": 9.999537193079362e-06,
"loss": 0.3947,
"step": 2060
},
{
"epoch": 1.5714556841905485,
"grad_norm": 0.08233608305454254,
"learning_rate": 9.999409180193255e-06,
"loss": 0.2997,
"step": 2070
},
{
"epoch": 1.5790472575441261,
"grad_norm": 0.010642267763614655,
"learning_rate": 9.99926555728009e-06,
"loss": 0.0658,
"step": 2080
},
{
"epoch": 1.5866388308977035,
"grad_norm": 33.69260787963867,
"learning_rate": 9.999106324788313e-06,
"loss": 0.2578,
"step": 2090
},
{
"epoch": 1.594230404251281,
"grad_norm": 35.530982971191406,
"learning_rate": 9.998931483215103e-06,
"loss": 0.0085,
"step": 2100
},
{
"epoch": 1.6018219776048586,
"grad_norm": 0.02198372408747673,
"learning_rate": 9.998741033106385e-06,
"loss": 0.1038,
"step": 2110
},
{
"epoch": 1.6094135509584362,
"grad_norm": 3.9551048278808594,
"learning_rate": 9.998534975056814e-06,
"loss": 0.1167,
"step": 2120
},
{
"epoch": 1.6170051243120138,
"grad_norm": 1.1452088356018066,
"learning_rate": 9.998313309709782e-06,
"loss": 0.1636,
"step": 2130
},
{
"epoch": 1.6245966976655912,
"grad_norm": 45.56749725341797,
"learning_rate": 9.998076037757408e-06,
"loss": 0.2347,
"step": 2140
},
{
"epoch": 1.6321882710191686,
"grad_norm": 0.002319494029507041,
"learning_rate": 9.997823159940545e-06,
"loss": 0.0795,
"step": 2150
},
{
"epoch": 1.6397798443727463,
"grad_norm": 0.028734903782606125,
"learning_rate": 9.997554677048776e-06,
"loss": 0.2305,
"step": 2160
},
{
"epoch": 1.647371417726324,
"grad_norm": 0.004517258144915104,
"learning_rate": 9.997270589920399e-06,
"loss": 0.0011,
"step": 2170
},
{
"epoch": 1.6549629910799013,
"grad_norm": 1.5917277336120605,
"learning_rate": 9.996970899442444e-06,
"loss": 0.1614,
"step": 2180
},
{
"epoch": 1.6625545644334787,
"grad_norm": 0.05392596498131752,
"learning_rate": 9.996655606550657e-06,
"loss": 0.2937,
"step": 2190
},
{
"epoch": 1.6701461377870563,
"grad_norm": 39.229007720947266,
"learning_rate": 9.996324712229499e-06,
"loss": 0.1227,
"step": 2200
},
{
"epoch": 1.677737711140634,
"grad_norm": 19.827287673950195,
"learning_rate": 9.995978217512146e-06,
"loss": 0.1703,
"step": 2210
},
{
"epoch": 1.6853292844942114,
"grad_norm": 0.007869013585150242,
"learning_rate": 9.995616123480485e-06,
"loss": 0.298,
"step": 2220
},
{
"epoch": 1.692920857847789,
"grad_norm": 17.308448791503906,
"learning_rate": 9.99523843126511e-06,
"loss": 0.2699,
"step": 2230
},
{
"epoch": 1.7005124312013664,
"grad_norm": 0.07290565222501755,
"learning_rate": 9.994845142045315e-06,
"loss": 0.0798,
"step": 2240
},
{
"epoch": 1.708104004554944,
"grad_norm": 0.0642884150147438,
"learning_rate": 9.994436257049098e-06,
"loss": 0.3115,
"step": 2250
},
{
"epoch": 1.7156955779085217,
"grad_norm": 3.773754835128784,
"learning_rate": 9.994011777553152e-06,
"loss": 0.1151,
"step": 2260
},
{
"epoch": 1.723287151262099,
"grad_norm": 12.578306198120117,
"learning_rate": 9.99357170488286e-06,
"loss": 0.2351,
"step": 2270
},
{
"epoch": 1.7308787246156765,
"grad_norm": 0.12735772132873535,
"learning_rate": 9.993116040412289e-06,
"loss": 0.2368,
"step": 2280
},
{
"epoch": 1.7384702979692541,
"grad_norm": 37.49304962158203,
"learning_rate": 9.9926447855642e-06,
"loss": 0.1451,
"step": 2290
},
{
"epoch": 1.7460618713228317,
"grad_norm": 7.337117671966553,
"learning_rate": 9.992157941810027e-06,
"loss": 0.2029,
"step": 2300
},
{
"epoch": 1.7536534446764092,
"grad_norm": 93.44843292236328,
"learning_rate": 9.991655510669875e-06,
"loss": 0.2177,
"step": 2310
},
{
"epoch": 1.7612450180299866,
"grad_norm": 6.563670635223389,
"learning_rate": 9.991137493712524e-06,
"loss": 0.0768,
"step": 2320
},
{
"epoch": 1.7688365913835642,
"grad_norm": 0.021621128544211388,
"learning_rate": 9.990603892555417e-06,
"loss": 0.1178,
"step": 2330
},
{
"epoch": 1.7764281647371418,
"grad_norm": 0.022252781316637993,
"learning_rate": 9.990054708864655e-06,
"loss": 0.1944,
"step": 2340
},
{
"epoch": 1.7840197380907195,
"grad_norm": 21.766817092895508,
"learning_rate": 9.989489944355e-06,
"loss": 0.355,
"step": 2350
},
{
"epoch": 1.7916113114442969,
"grad_norm": 0.05736351013183594,
"learning_rate": 9.988909600789851e-06,
"loss": 0.1318,
"step": 2360
},
{
"epoch": 1.7992028847978743,
"grad_norm": 44.977779388427734,
"learning_rate": 9.988313679981263e-06,
"loss": 0.0222,
"step": 2370
},
{
"epoch": 1.806794458151452,
"grad_norm": 0.016255084425210953,
"learning_rate": 9.987702183789922e-06,
"loss": 0.1285,
"step": 2380
},
{
"epoch": 1.8143860315050295,
"grad_norm": 0.5945267081260681,
"learning_rate": 9.987075114125148e-06,
"loss": 0.3838,
"step": 2390
},
{
"epoch": 1.821977604858607,
"grad_norm": 0.004704204387962818,
"learning_rate": 9.986432472944887e-06,
"loss": 0.1587,
"step": 2400
},
{
"epoch": 1.8295691782121843,
"grad_norm": 0.07433657348155975,
"learning_rate": 9.985774262255708e-06,
"loss": 0.1604,
"step": 2410
},
{
"epoch": 1.837160751565762,
"grad_norm": 0.08134903013706207,
"learning_rate": 9.985100484112786e-06,
"loss": 0.2395,
"step": 2420
},
{
"epoch": 1.8447523249193396,
"grad_norm": 0.5896629095077515,
"learning_rate": 9.984411140619914e-06,
"loss": 0.0397,
"step": 2430
},
{
"epoch": 1.852343898272917,
"grad_norm": 0.0015955844428390265,
"learning_rate": 9.983706233929477e-06,
"loss": 0.2479,
"step": 2440
},
{
"epoch": 1.8599354716264946,
"grad_norm": 12.32898998260498,
"learning_rate": 9.982985766242458e-06,
"loss": 0.071,
"step": 2450
},
{
"epoch": 1.867527044980072,
"grad_norm": 0.17913532257080078,
"learning_rate": 9.98224973980843e-06,
"loss": 0.0426,
"step": 2460
},
{
"epoch": 1.8751186183336497,
"grad_norm": 0.06611054390668869,
"learning_rate": 9.981498156925539e-06,
"loss": 0.3534,
"step": 2470
},
{
"epoch": 1.8827101916872273,
"grad_norm": 2.170029640197754,
"learning_rate": 9.98073101994051e-06,
"loss": 0.1845,
"step": 2480
},
{
"epoch": 1.8903017650408047,
"grad_norm": 3.257478952407837,
"learning_rate": 9.979948331248633e-06,
"loss": 0.0038,
"step": 2490
},
{
"epoch": 1.8978933383943821,
"grad_norm": 138.6713409423828,
"learning_rate": 9.979150093293753e-06,
"loss": 0.1855,
"step": 2500
},
{
"epoch": 1.9054849117479598,
"grad_norm": 0.7939999103546143,
"learning_rate": 9.978336308568266e-06,
"loss": 0.2101,
"step": 2510
},
{
"epoch": 1.9130764851015374,
"grad_norm": 14.836468696594238,
"learning_rate": 9.977506979613118e-06,
"loss": 0.2692,
"step": 2520
},
{
"epoch": 1.9206680584551148,
"grad_norm": 0.4420275390148163,
"learning_rate": 9.97666210901778e-06,
"loss": 0.0356,
"step": 2530
},
{
"epoch": 1.9282596318086922,
"grad_norm": 4.923569679260254,
"learning_rate": 9.975801699420256e-06,
"loss": 0.1263,
"step": 2540
},
{
"epoch": 1.9358512051622698,
"grad_norm": 0.01419526245445013,
"learning_rate": 9.974925753507066e-06,
"loss": 0.0735,
"step": 2550
},
{
"epoch": 1.9434427785158475,
"grad_norm": 72.68999481201172,
"learning_rate": 9.974034274013242e-06,
"loss": 0.0418,
"step": 2560
},
{
"epoch": 1.951034351869425,
"grad_norm": 0.0027209515683352947,
"learning_rate": 9.973127263722317e-06,
"loss": 0.0042,
"step": 2570
},
{
"epoch": 1.9586259252230025,
"grad_norm": 0.015417971648275852,
"learning_rate": 9.972204725466316e-06,
"loss": 0.2174,
"step": 2580
},
{
"epoch": 1.96621749857658,
"grad_norm": 0.013561515137553215,
"learning_rate": 9.971266662125749e-06,
"loss": 0.0808,
"step": 2590
},
{
"epoch": 1.9738090719301575,
"grad_norm": 72.93014526367188,
"learning_rate": 9.9703130766296e-06,
"loss": 0.2353,
"step": 2600
},
{
"epoch": 1.9814006452837352,
"grad_norm": 0.028727278113365173,
"learning_rate": 9.96934397195532e-06,
"loss": 0.0344,
"step": 2610
},
{
"epoch": 1.9889922186373126,
"grad_norm": 6.5093770027160645,
"learning_rate": 9.96835935112882e-06,
"loss": 0.3215,
"step": 2620
},
{
"epoch": 1.99658379199089,
"grad_norm": 45.58213806152344,
"learning_rate": 9.96735921722445e-06,
"loss": 0.4849,
"step": 2630
},
{
"epoch": 1.9996204213323212,
"eval_f1": 0.9241639816476168,
"eval_loss": 0.15625236928462982,
"eval_precision": 0.9250778152019562,
"eval_recall": 0.9241274658573596,
"eval_runtime": 75.5915,
"eval_samples_per_second": 17.436,
"eval_steps_per_second": 17.436,
"step": 2634
},
{
"epoch": 2.0041753653444676,
"grad_norm": 0.07899657636880875,
"learning_rate": 9.966343573365005e-06,
"loss": 0.0937,
"step": 2640
},
{
"epoch": 2.0117669386980452,
"grad_norm": 1.1364494562149048,
"learning_rate": 9.965312422721705e-06,
"loss": 0.0372,
"step": 2650
},
{
"epoch": 2.019358512051623,
"grad_norm": 0.009463181719183922,
"learning_rate": 9.964265768514189e-06,
"loss": 0.1315,
"step": 2660
},
{
"epoch": 2.0269500854052,
"grad_norm": 152.41160583496094,
"learning_rate": 9.963203614010502e-06,
"loss": 0.1601,
"step": 2670
},
{
"epoch": 2.0345416587587777,
"grad_norm": 12.7033109664917,
"learning_rate": 9.962125962527088e-06,
"loss": 0.1492,
"step": 2680
},
{
"epoch": 2.0421332321123553,
"grad_norm": 0.1103023886680603,
"learning_rate": 9.961032817428779e-06,
"loss": 0.044,
"step": 2690
},
{
"epoch": 2.049724805465933,
"grad_norm": 0.04437507316470146,
"learning_rate": 9.959924182128784e-06,
"loss": 0.2004,
"step": 2700
},
{
"epoch": 2.05731637881951,
"grad_norm": 0.016279350966215134,
"learning_rate": 9.958800060088675e-06,
"loss": 0.0789,
"step": 2710
},
{
"epoch": 2.0649079521730878,
"grad_norm": 0.06195428967475891,
"learning_rate": 9.957660454818385e-06,
"loss": 0.1212,
"step": 2720
},
{
"epoch": 2.0724995255266654,
"grad_norm": 0.07117705792188644,
"learning_rate": 9.956505369876187e-06,
"loss": 0.1124,
"step": 2730
},
{
"epoch": 2.080091098880243,
"grad_norm": 0.0017620900180190802,
"learning_rate": 9.955334808868686e-06,
"loss": 0.2135,
"step": 2740
},
{
"epoch": 2.0876826722338206,
"grad_norm": 0.0784306600689888,
"learning_rate": 9.954148775450816e-06,
"loss": 0.0047,
"step": 2750
},
{
"epoch": 2.095274245587398,
"grad_norm": 0.014996266923844814,
"learning_rate": 9.952947273325815e-06,
"loss": 0.0063,
"step": 2760
},
{
"epoch": 2.1028658189409755,
"grad_norm": 3.2599010467529297,
"learning_rate": 9.951730306245222e-06,
"loss": 0.1602,
"step": 2770
},
{
"epoch": 2.110457392294553,
"grad_norm": 0.016863863915205002,
"learning_rate": 9.950497878008865e-06,
"loss": 0.0317,
"step": 2780
},
{
"epoch": 2.1180489656481307,
"grad_norm": 15.340392112731934,
"learning_rate": 9.949249992464847e-06,
"loss": 0.154,
"step": 2790
},
{
"epoch": 2.125640539001708,
"grad_norm": 4.341642379760742,
"learning_rate": 9.947986653509531e-06,
"loss": 0.0257,
"step": 2800
},
{
"epoch": 2.1332321123552855,
"grad_norm": 1.8507261276245117,
"learning_rate": 9.946707865087538e-06,
"loss": 0.1434,
"step": 2810
},
{
"epoch": 2.140823685708863,
"grad_norm": 0.16088451445102692,
"learning_rate": 9.94541363119172e-06,
"loss": 0.0837,
"step": 2820
},
{
"epoch": 2.148415259062441,
"grad_norm": 0.689831018447876,
"learning_rate": 9.944103955863162e-06,
"loss": 0.4116,
"step": 2830
},
{
"epoch": 2.1560068324160184,
"grad_norm": 1.8963958024978638,
"learning_rate": 9.94277884319116e-06,
"loss": 0.1837,
"step": 2840
},
{
"epoch": 2.1635984057695956,
"grad_norm": 0.024928750470280647,
"learning_rate": 9.941438297313215e-06,
"loss": 0.0743,
"step": 2850
},
{
"epoch": 2.1711899791231732,
"grad_norm": 0.006995880510658026,
"learning_rate": 9.940082322415008e-06,
"loss": 0.0001,
"step": 2860
},
{
"epoch": 2.178781552476751,
"grad_norm": 78.55364227294922,
"learning_rate": 9.938710922730404e-06,
"loss": 0.1252,
"step": 2870
},
{
"epoch": 2.1863731258303285,
"grad_norm": 0.013810686767101288,
"learning_rate": 9.937324102541424e-06,
"loss": 0.0243,
"step": 2880
},
{
"epoch": 2.1939646991839057,
"grad_norm": 0.007164845243096352,
"learning_rate": 9.935921866178242e-06,
"loss": 0.0583,
"step": 2890
},
{
"epoch": 2.2015562725374833,
"grad_norm": 0.0043396539986133575,
"learning_rate": 9.934504218019161e-06,
"loss": 0.0862,
"step": 2900
},
{
"epoch": 2.209147845891061,
"grad_norm": 0.007671877276152372,
"learning_rate": 9.933071162490613e-06,
"loss": 0.0016,
"step": 2910
},
{
"epoch": 2.2167394192446386,
"grad_norm": 327.8991394042969,
"learning_rate": 9.931622704067133e-06,
"loss": 0.1624,
"step": 2920
},
{
"epoch": 2.224330992598216,
"grad_norm": 16.11570167541504,
"learning_rate": 9.93015884727135e-06,
"loss": 0.2645,
"step": 2930
},
{
"epoch": 2.2319225659517934,
"grad_norm": 0.005082719959318638,
"learning_rate": 9.928679596673974e-06,
"loss": 0.0002,
"step": 2940
},
{
"epoch": 2.239514139305371,
"grad_norm": 0.01941937580704689,
"learning_rate": 9.927184956893778e-06,
"loss": 0.0612,
"step": 2950
},
{
"epoch": 2.2471057126589487,
"grad_norm": 19.174551010131836,
"learning_rate": 9.925674932597586e-06,
"loss": 0.2042,
"step": 2960
},
{
"epoch": 2.2546972860125263,
"grad_norm": 21.23321533203125,
"learning_rate": 9.924149528500259e-06,
"loss": 0.0703,
"step": 2970
},
{
"epoch": 2.2622888593661035,
"grad_norm": 0.11990063637495041,
"learning_rate": 9.922608749364684e-06,
"loss": 0.1142,
"step": 2980
},
{
"epoch": 2.269880432719681,
"grad_norm": 0.1152704656124115,
"learning_rate": 9.921052600001746e-06,
"loss": 0.102,
"step": 2990
},
{
"epoch": 2.2774720060732587,
"grad_norm": 36.8327751159668,
"learning_rate": 9.919481085270328e-06,
"loss": 0.0215,
"step": 3000
},
{
"epoch": 2.2850635794268364,
"grad_norm": 0.06316674500703812,
"learning_rate": 9.917894210077285e-06,
"loss": 0.1024,
"step": 3010
},
{
"epoch": 2.292655152780414,
"grad_norm": 0.04541470482945442,
"learning_rate": 9.916291979377436e-06,
"loss": 0.21,
"step": 3020
},
{
"epoch": 2.300246726133991,
"grad_norm": 2.5551743507385254,
"learning_rate": 9.914674398173548e-06,
"loss": 0.0009,
"step": 3030
},
{
"epoch": 2.307838299487569,
"grad_norm": 0.0514085479080677,
"learning_rate": 9.913041471516311e-06,
"loss": 0.0674,
"step": 3040
},
{
"epoch": 2.3154298728411464,
"grad_norm": 0.09069258719682693,
"learning_rate": 9.911393204504339e-06,
"loss": 0.1548,
"step": 3050
},
{
"epoch": 2.3230214461947236,
"grad_norm": 0.0353839211165905,
"learning_rate": 9.909729602284131e-06,
"loss": 0.1214,
"step": 3060
},
{
"epoch": 2.3306130195483012,
"grad_norm": 0.006493726279586554,
"learning_rate": 9.908050670050081e-06,
"loss": 0.0039,
"step": 3070
},
{
"epoch": 2.338204592901879,
"grad_norm": 0.009368511848151684,
"learning_rate": 9.906356413044443e-06,
"loss": 0.0779,
"step": 3080
},
{
"epoch": 2.3457961662554565,
"grad_norm": 0.011731524951756,
"learning_rate": 9.90464683655732e-06,
"loss": 0.1077,
"step": 3090
},
{
"epoch": 2.353387739609034,
"grad_norm": 63.11314392089844,
"learning_rate": 9.902921945926653e-06,
"loss": 0.2824,
"step": 3100
},
{
"epoch": 2.3609793129626113,
"grad_norm": 0.0035196368116885424,
"learning_rate": 9.901181746538196e-06,
"loss": 0.0024,
"step": 3110
},
{
"epoch": 2.368570886316189,
"grad_norm": 0.0374101847410202,
"learning_rate": 9.8994262438255e-06,
"loss": 0.012,
"step": 3120
},
{
"epoch": 2.3761624596697666,
"grad_norm": 16.60328483581543,
"learning_rate": 9.897833211571187e-06,
"loss": 0.466,
"step": 3130
},
{
"epoch": 2.383754033023344,
"grad_norm": 21.628568649291992,
"learning_rate": 9.896048647683e-06,
"loss": 0.1202,
"step": 3140
},
{
"epoch": 2.3913456063769214,
"grad_norm": 15.491986274719238,
"learning_rate": 9.894248796498034e-06,
"loss": 0.056,
"step": 3150
},
{
"epoch": 2.398937179730499,
"grad_norm": 0.009366643615067005,
"learning_rate": 9.892433663636095e-06,
"loss": 0.0003,
"step": 3160
},
{
"epoch": 2.4065287530840767,
"grad_norm": 75.25447082519531,
"learning_rate": 9.890603254764708e-06,
"loss": 0.1785,
"step": 3170
},
{
"epoch": 2.4141203264376543,
"grad_norm": 0.010000905022025108,
"learning_rate": 9.888757575599095e-06,
"loss": 0.1125,
"step": 3180
},
{
"epoch": 2.421711899791232,
"grad_norm": 0.21319662034511566,
"learning_rate": 9.886896631902156e-06,
"loss": 0.0575,
"step": 3190
},
{
"epoch": 2.429303473144809,
"grad_norm": 6.481915473937988,
"learning_rate": 9.885020429484457e-06,
"loss": 0.2689,
"step": 3200
},
{
"epoch": 2.4368950464983867,
"grad_norm": 0.20284566283226013,
"learning_rate": 9.8831289742042e-06,
"loss": 0.149,
"step": 3210
},
{
"epoch": 2.4444866198519644,
"grad_norm": 1.3910574913024902,
"learning_rate": 9.881222271967224e-06,
"loss": 0.0142,
"step": 3220
},
{
"epoch": 2.452078193205542,
"grad_norm": 0.09682253748178482,
"learning_rate": 9.879300328726958e-06,
"loss": 0.0021,
"step": 3230
},
{
"epoch": 2.459669766559119,
"grad_norm": 0.005042471457272768,
"learning_rate": 9.877363150484434e-06,
"loss": 0.2168,
"step": 3240
},
{
"epoch": 2.467261339912697,
"grad_norm": 64.47718811035156,
"learning_rate": 9.875410743288246e-06,
"loss": 0.1994,
"step": 3250
},
{
"epoch": 2.4748529132662744,
"grad_norm": 0.2548009753227234,
"learning_rate": 9.873443113234541e-06,
"loss": 0.2271,
"step": 3260
},
{
"epoch": 2.482444486619852,
"grad_norm": 0.008805714547634125,
"learning_rate": 9.871460266466996e-06,
"loss": 0.0827,
"step": 3270
},
{
"epoch": 2.4900360599734297,
"grad_norm": 0.05888598784804344,
"learning_rate": 9.8694622091768e-06,
"loss": 0.054,
"step": 3280
},
{
"epoch": 2.497627633327007,
"grad_norm": 0.004817333538085222,
"learning_rate": 9.867448947602637e-06,
"loss": 0.105,
"step": 3290
},
{
"epoch": 2.5052192066805845,
"grad_norm": 0.04850906506180763,
"learning_rate": 9.865420488030664e-06,
"loss": 0.2363,
"step": 3300
},
{
"epoch": 2.512810780034162,
"grad_norm": 0.14938922226428986,
"learning_rate": 9.86337683679449e-06,
"loss": 0.1593,
"step": 3310
},
{
"epoch": 2.5204023533877398,
"grad_norm": 18.9013729095459,
"learning_rate": 9.861318000275158e-06,
"loss": 0.2351,
"step": 3320
},
{
"epoch": 2.527993926741317,
"grad_norm": 0.025823410600423813,
"learning_rate": 9.85924398490113e-06,
"loss": 0.0022,
"step": 3330
},
{
"epoch": 2.5355855000948946,
"grad_norm": 28.33924674987793,
"learning_rate": 9.857154797148255e-06,
"loss": 0.2312,
"step": 3340
},
{
"epoch": 2.543177073448472,
"grad_norm": 0.001974069746211171,
"learning_rate": 9.855050443539761e-06,
"loss": 0.0002,
"step": 3350
},
{
"epoch": 2.55076864680205,
"grad_norm": 21.997047424316406,
"learning_rate": 9.852930930646228e-06,
"loss": 0.1257,
"step": 3360
},
{
"epoch": 2.5583602201556275,
"grad_norm": 0.48950299620628357,
"learning_rate": 9.850796265085567e-06,
"loss": 0.0062,
"step": 3370
},
{
"epoch": 2.5659517935092047,
"grad_norm": 8.470258712768555,
"learning_rate": 9.848646453523005e-06,
"loss": 0.0585,
"step": 3380
},
{
"epoch": 2.5735433668627823,
"grad_norm": 0.11571002751588821,
"learning_rate": 9.846481502671056e-06,
"loss": 0.0329,
"step": 3390
},
{
"epoch": 2.58113494021636,
"grad_norm": 11.877908706665039,
"learning_rate": 9.844301419289511e-06,
"loss": 0.2921,
"step": 3400
},
{
"epoch": 2.588726513569937,
"grad_norm": 36.33771896362305,
"learning_rate": 9.842106210185403e-06,
"loss": 0.2223,
"step": 3410
},
{
"epoch": 2.5963180869235147,
"grad_norm": 2.979523181915283,
"learning_rate": 9.839895882212997e-06,
"loss": 0.0653,
"step": 3420
},
{
"epoch": 2.6039096602770924,
"grad_norm": 0.013308779336512089,
"learning_rate": 9.837670442273768e-06,
"loss": 0.0735,
"step": 3430
},
{
"epoch": 2.61150123363067,
"grad_norm": 111.11514282226562,
"learning_rate": 9.835429897316367e-06,
"loss": 0.1495,
"step": 3440
},
{
"epoch": 2.6190928069842476,
"grad_norm": 0.007320565637201071,
"learning_rate": 9.833174254336618e-06,
"loss": 0.3018,
"step": 3450
},
{
"epoch": 2.6266843803378253,
"grad_norm": 0.010831023566424847,
"learning_rate": 9.830903520377482e-06,
"loss": 0.0203,
"step": 3460
},
{
"epoch": 2.6342759536914024,
"grad_norm": 18.389625549316406,
"learning_rate": 9.82861770252904e-06,
"loss": 0.1973,
"step": 3470
},
{
"epoch": 2.64186752704498,
"grad_norm": 12.364988327026367,
"learning_rate": 9.826316807928468e-06,
"loss": 0.0988,
"step": 3480
},
{
"epoch": 2.6494591003985577,
"grad_norm": 0.0008839545771479607,
"learning_rate": 9.824000843760028e-06,
"loss": 0.0552,
"step": 3490
},
{
"epoch": 2.657050673752135,
"grad_norm": 0.028787225484848022,
"learning_rate": 9.821669817255021e-06,
"loss": 0.1918,
"step": 3500
},
{
"epoch": 2.6646422471057125,
"grad_norm": 0.007524173706769943,
"learning_rate": 9.819323735691787e-06,
"loss": 0.0056,
"step": 3510
},
{
"epoch": 2.67223382045929,
"grad_norm": 7.9602837562561035,
"learning_rate": 9.816962606395668e-06,
"loss": 0.1273,
"step": 3520
},
{
"epoch": 2.6798253938128678,
"grad_norm": 15.868315696716309,
"learning_rate": 9.814586436738998e-06,
"loss": 0.0943,
"step": 3530
},
{
"epoch": 2.6874169671664454,
"grad_norm": 0.2785890996456146,
"learning_rate": 9.812195234141064e-06,
"loss": 0.1291,
"step": 3540
},
{
"epoch": 2.695008540520023,
"grad_norm": 63.62078857421875,
"learning_rate": 9.809789006068097e-06,
"loss": 0.0672,
"step": 3550
},
{
"epoch": 2.7026001138736,
"grad_norm": 2.8807220458984375,
"learning_rate": 9.807367760033245e-06,
"loss": 0.217,
"step": 3560
},
{
"epoch": 2.710191687227178,
"grad_norm": 36.00885009765625,
"learning_rate": 9.80493150359654e-06,
"loss": 0.1016,
"step": 3570
},
{
"epoch": 2.7177832605807555,
"grad_norm": 0.021623503416776657,
"learning_rate": 9.80248024436489e-06,
"loss": 0.2195,
"step": 3580
},
{
"epoch": 2.7253748339343327,
"grad_norm": 0.03640507906675339,
"learning_rate": 9.800013989992042e-06,
"loss": 0.0001,
"step": 3590
},
{
"epoch": 2.7329664072879103,
"grad_norm": 27.120119094848633,
"learning_rate": 9.797532748178566e-06,
"loss": 0.4964,
"step": 3600
},
{
"epoch": 2.740557980641488,
"grad_norm": 0.08877989649772644,
"learning_rate": 9.795036526671828e-06,
"loss": 0.0498,
"step": 3610
},
{
"epoch": 2.7481495539950656,
"grad_norm": 0.0727711170911789,
"learning_rate": 9.792525333265965e-06,
"loss": 0.1452,
"step": 3620
},
{
"epoch": 2.755741127348643,
"grad_norm": 0.21834716200828552,
"learning_rate": 9.789999175801866e-06,
"loss": 0.1315,
"step": 3630
},
{
"epoch": 2.763332700702221,
"grad_norm": 3.933009147644043,
"learning_rate": 9.787458062167135e-06,
"loss": 0.0726,
"step": 3640
},
{
"epoch": 2.770924274055798,
"grad_norm": 0.00495730759575963,
"learning_rate": 9.784902000296084e-06,
"loss": 0.0092,
"step": 3650
},
{
"epoch": 2.7785158474093756,
"grad_norm": 0.06244872510433197,
"learning_rate": 9.782330998169695e-06,
"loss": 0.2204,
"step": 3660
},
{
"epoch": 2.7861074207629533,
"grad_norm": 0.032471269369125366,
"learning_rate": 9.779745063815598e-06,
"loss": 0.0887,
"step": 3670
},
{
"epoch": 2.7936989941165304,
"grad_norm": 0.0014243993209674954,
"learning_rate": 9.777144205308049e-06,
"loss": 0.1105,
"step": 3680
},
{
"epoch": 2.801290567470108,
"grad_norm": 69.43852233886719,
"learning_rate": 9.774528430767902e-06,
"loss": 0.0603,
"step": 3690
},
{
"epoch": 2.8088821408236857,
"grad_norm": 0.06080542132258415,
"learning_rate": 9.771897748362583e-06,
"loss": 0.0163,
"step": 3700
},
{
"epoch": 2.8164737141772633,
"grad_norm": 0.09897174686193466,
"learning_rate": 9.769252166306066e-06,
"loss": 0.1167,
"step": 3710
},
{
"epoch": 2.824065287530841,
"grad_norm": 0.20604291558265686,
"learning_rate": 9.766591692858854e-06,
"loss": 0.0706,
"step": 3720
},
{
"epoch": 2.831656860884418,
"grad_norm": 32.105499267578125,
"learning_rate": 9.763916336327935e-06,
"loss": 0.5321,
"step": 3730
},
{
"epoch": 2.8392484342379958,
"grad_norm": 0.00609110202640295,
"learning_rate": 9.761226105066778e-06,
"loss": 0.0794,
"step": 3740
},
{
"epoch": 2.8468400075915734,
"grad_norm": 0.14252524077892303,
"learning_rate": 9.75852100747529e-06,
"loss": 0.1037,
"step": 3750
},
{
"epoch": 2.854431580945151,
"grad_norm": 0.0007404695497825742,
"learning_rate": 9.7558010519998e-06,
"loss": 0.0552,
"step": 3760
},
{
"epoch": 2.8620231542987282,
"grad_norm": 0.007310529239475727,
"learning_rate": 9.753066247133025e-06,
"loss": 0.009,
"step": 3770
},
{
"epoch": 2.869614727652306,
"grad_norm": 88.26655578613281,
"learning_rate": 9.750316601414051e-06,
"loss": 0.1008,
"step": 3780
},
{
"epoch": 2.8772063010058835,
"grad_norm": 0.01418048795312643,
"learning_rate": 9.7475521234283e-06,
"loss": 0.0262,
"step": 3790
},
{
"epoch": 2.884797874359461,
"grad_norm": 0.06487419456243515,
"learning_rate": 9.744772821807509e-06,
"loss": 0.1206,
"step": 3800
},
{
"epoch": 2.8923894477130387,
"grad_norm": 0.0070535228587687016,
"learning_rate": 9.741978705229697e-06,
"loss": 0.0897,
"step": 3810
},
{
"epoch": 2.899981021066616,
"grad_norm": 1.5489246845245361,
"learning_rate": 9.739169782419143e-06,
"loss": 0.0008,
"step": 3820
},
{
"epoch": 2.9075725944201936,
"grad_norm": 0.001165062771178782,
"learning_rate": 9.736346062146356e-06,
"loss": 0.0239,
"step": 3830
},
{
"epoch": 2.915164167773771,
"grad_norm": 0.0013667664024978876,
"learning_rate": 9.733507553228045e-06,
"loss": 0.0017,
"step": 3840
},
{
"epoch": 2.9227557411273484,
"grad_norm": 0.004272387828677893,
"learning_rate": 9.7306542645271e-06,
"loss": 0.1874,
"step": 3850
},
{
"epoch": 2.930347314480926,
"grad_norm": 0.032470703125,
"learning_rate": 9.727786204952554e-06,
"loss": 0.0128,
"step": 3860
},
{
"epoch": 2.9379388878345036,
"grad_norm": 0.010683764703571796,
"learning_rate": 9.724903383459566e-06,
"loss": 0.064,
"step": 3870
},
{
"epoch": 2.9455304611880813,
"grad_norm": 11.981929779052734,
"learning_rate": 9.722005809049382e-06,
"loss": 0.2962,
"step": 3880
},
{
"epoch": 2.953122034541659,
"grad_norm": 7.638548374176025,
"learning_rate": 9.719093490769315e-06,
"loss": 0.2084,
"step": 3890
},
{
"epoch": 2.9607136078952365,
"grad_norm": 0.0027020114939659834,
"learning_rate": 9.71616643771271e-06,
"loss": 0.0521,
"step": 3900
},
{
"epoch": 2.9683051812488137,
"grad_norm": 0.041696127504110336,
"learning_rate": 9.713224659018927e-06,
"loss": 0.1488,
"step": 3910
},
{
"epoch": 2.9758967546023913,
"grad_norm": 276.02947998046875,
"learning_rate": 9.710268163873298e-06,
"loss": 0.1649,
"step": 3920
},
{
"epoch": 2.983488327955969,
"grad_norm": 4.512789726257324,
"learning_rate": 9.707296961507107e-06,
"loss": 0.0364,
"step": 3930
},
{
"epoch": 2.991079901309546,
"grad_norm": 0.07038887590169907,
"learning_rate": 9.70431106119756e-06,
"loss": 0.046,
"step": 3940
},
{
"epoch": 2.998671474663124,
"grad_norm": 52.16018295288086,
"learning_rate": 9.701310472267757e-06,
"loss": 0.1439,
"step": 3950
},
{
"epoch": 2.999430631998482,
"eval_f1": 0.9468203897167411,
"eval_loss": 0.17982631921768188,
"eval_precision": 0.9451027269774426,
"eval_recall": 0.9468892261001517,
"eval_runtime": 75.734,
"eval_samples_per_second": 17.403,
"eval_steps_per_second": 17.403,
"step": 3951
},
{
"epoch": 3.0062630480167014,
"grad_norm": 0.003164840629324317,
"learning_rate": 9.69829520408666e-06,
"loss": 0.0687,
"step": 3960
},
{
"epoch": 3.013854621370279,
"grad_norm": 0.005421197507530451,
"learning_rate": 9.695265266069066e-06,
"loss": 0.1768,
"step": 3970
},
{
"epoch": 3.0214461947238567,
"grad_norm": 0.07668659836053848,
"learning_rate": 9.692220667675572e-06,
"loss": 0.0092,
"step": 3980
},
{
"epoch": 3.029037768077434,
"grad_norm": 0.0020935048814862967,
"learning_rate": 9.689161418412557e-06,
"loss": 0.2435,
"step": 3990
},
{
"epoch": 3.0366293414310115,
"grad_norm": 0.012631943449378014,
"learning_rate": 9.68608752783214e-06,
"loss": 0.0858,
"step": 4000
},
{
"epoch": 3.044220914784589,
"grad_norm": 0.005341747775673866,
"learning_rate": 9.682999005532161e-06,
"loss": 0.0094,
"step": 4010
},
{
"epoch": 3.0518124881381667,
"grad_norm": 0.02143806405365467,
"learning_rate": 9.67989586115614e-06,
"loss": 0.0031,
"step": 4020
},
{
"epoch": 3.0594040614917444,
"grad_norm": 13.902883529663086,
"learning_rate": 9.67677810439326e-06,
"loss": 0.0965,
"step": 4030
},
{
"epoch": 3.0669956348453216,
"grad_norm": 0.20893624424934387,
"learning_rate": 9.67364574497832e-06,
"loss": 0.107,
"step": 4040
},
{
"epoch": 3.074587208198899,
"grad_norm": 0.18238410353660583,
"learning_rate": 9.67049879269172e-06,
"loss": 0.001,
"step": 4050
},
{
"epoch": 3.082178781552477,
"grad_norm": 0.022665822878479958,
"learning_rate": 9.667337257359425e-06,
"loss": 0.1673,
"step": 4060
},
{
"epoch": 3.0897703549060545,
"grad_norm": 10.807044982910156,
"learning_rate": 9.664161148852932e-06,
"loss": 0.0674,
"step": 4070
},
{
"epoch": 3.0973619282596316,
"grad_norm": 0.0026043581310659647,
"learning_rate": 9.660970477089238e-06,
"loss": 0.0097,
"step": 4080
},
{
"epoch": 3.1049535016132093,
"grad_norm": 18.194334030151367,
"learning_rate": 9.657765252030815e-06,
"loss": 0.0064,
"step": 4090
},
{
"epoch": 3.112545074966787,
"grad_norm": 12.572392463684082,
"learning_rate": 9.654545483685578e-06,
"loss": 0.1343,
"step": 4100
},
{
"epoch": 3.1201366483203645,
"grad_norm": 0.0007624260615557432,
"learning_rate": 9.651311182106848e-06,
"loss": 0.0325,
"step": 4110
},
{
"epoch": 3.1277282216739417,
"grad_norm": 0.018368422985076904,
"learning_rate": 9.648062357393325e-06,
"loss": 0.0005,
"step": 4120
},
{
"epoch": 3.1353197950275193,
"grad_norm": 78.8929443359375,
"learning_rate": 9.644799019689056e-06,
"loss": 0.054,
"step": 4130
},
{
"epoch": 3.142911368381097,
"grad_norm": 0.010049775242805481,
"learning_rate": 9.641521179183403e-06,
"loss": 0.0157,
"step": 4140
},
{
"epoch": 3.1505029417346746,
"grad_norm": 91.76640319824219,
"learning_rate": 9.638228846111011e-06,
"loss": 0.1893,
"step": 4150
},
{
"epoch": 3.1580945150882522,
"grad_norm": 0.30123358964920044,
"learning_rate": 9.634922030751777e-06,
"loss": 0.2819,
"step": 4160
},
{
"epoch": 3.1656860884418294,
"grad_norm": 32.838623046875,
"learning_rate": 9.631600743430817e-06,
"loss": 0.2494,
"step": 4170
},
{
"epoch": 3.173277661795407,
"grad_norm": 0.1474120020866394,
"learning_rate": 9.628264994518431e-06,
"loss": 0.0401,
"step": 4180
},
{
"epoch": 3.1808692351489847,
"grad_norm": 0.16810506582260132,
"learning_rate": 9.624914794430078e-06,
"loss": 0.0668,
"step": 4190
},
{
"epoch": 3.1884608085025623,
"grad_norm": 1.5835288763046265,
"learning_rate": 9.621550153626338e-06,
"loss": 0.1177,
"step": 4200
},
{
"epoch": 3.1960523818561395,
"grad_norm": 0.00022748277115169913,
"learning_rate": 9.618171082612875e-06,
"loss": 0.006,
"step": 4210
},
{
"epoch": 3.203643955209717,
"grad_norm": 0.011720534414052963,
"learning_rate": 9.614777591940419e-06,
"loss": 0.0547,
"step": 4220
},
{
"epoch": 3.2112355285632947,
"grad_norm": 16.759693145751953,
"learning_rate": 9.611369692204712e-06,
"loss": 0.0687,
"step": 4230
},
{
"epoch": 3.2188271019168724,
"grad_norm": 13.746438026428223,
"learning_rate": 9.6079473940465e-06,
"loss": 0.1731,
"step": 4240
},
{
"epoch": 3.22641867527045,
"grad_norm": 1.0661725997924805,
"learning_rate": 9.604510708151472e-06,
"loss": 0.0012,
"step": 4250
},
{
"epoch": 3.234010248624027,
"grad_norm": 0.0051275817677378654,
"learning_rate": 9.601059645250253e-06,
"loss": 0.1559,
"step": 4260
},
{
"epoch": 3.241601821977605,
"grad_norm": 0.03845924511551857,
"learning_rate": 9.59759421611835e-06,
"loss": 0.0414,
"step": 4270
},
{
"epoch": 3.2491933953311825,
"grad_norm": 0.2744313180446625,
"learning_rate": 9.594114431576133e-06,
"loss": 0.2521,
"step": 4280
},
{
"epoch": 3.25678496868476,
"grad_norm": 0.06969039887189865,
"learning_rate": 9.590620302488792e-06,
"loss": 0.1007,
"step": 4290
},
{
"epoch": 3.2643765420383373,
"grad_norm": 0.044375017285346985,
"learning_rate": 9.587111839766303e-06,
"loss": 0.1706,
"step": 4300
},
{
"epoch": 3.271968115391915,
"grad_norm": 0.008467442356050014,
"learning_rate": 9.583589054363402e-06,
"loss": 0.0518,
"step": 4310
},
{
"epoch": 3.2795596887454925,
"grad_norm": 0.006757930386811495,
"learning_rate": 9.580051957279545e-06,
"loss": 0.1301,
"step": 4320
},
{
"epoch": 3.28715126209907,
"grad_norm": 0.22480565309524536,
"learning_rate": 9.57650055955887e-06,
"loss": 0.2225,
"step": 4330
},
{
"epoch": 3.294742835452648,
"grad_norm": 0.005938298534601927,
"learning_rate": 9.572934872290175e-06,
"loss": 0.1615,
"step": 4340
},
{
"epoch": 3.302334408806225,
"grad_norm": 0.031019240617752075,
"learning_rate": 9.569354906606864e-06,
"loss": 0.0292,
"step": 4350
},
{
"epoch": 3.3099259821598026,
"grad_norm": 0.058189138770103455,
"learning_rate": 9.565760673686936e-06,
"loss": 0.1437,
"step": 4360
},
{
"epoch": 3.3175175555133802,
"grad_norm": 18.81794548034668,
"learning_rate": 9.56215218475293e-06,
"loss": 0.1732,
"step": 4370
},
{
"epoch": 3.325109128866958,
"grad_norm": 0.037775713950395584,
"learning_rate": 9.558529451071896e-06,
"loss": 0.0048,
"step": 4380
},
{
"epoch": 3.332700702220535,
"grad_norm": 0.014422253705561161,
"learning_rate": 9.55489248395537e-06,
"loss": 0.0021,
"step": 4390
},
{
"epoch": 3.3402922755741127,
"grad_norm": 30.743995666503906,
"learning_rate": 9.551241294759322e-06,
"loss": 0.238,
"step": 4400
},
{
"epoch": 3.3478838489276903,
"grad_norm": 1.6870224475860596,
"learning_rate": 9.547575894884132e-06,
"loss": 0.09,
"step": 4410
},
{
"epoch": 3.355475422281268,
"grad_norm": 0.03549875691533089,
"learning_rate": 9.54389629577455e-06,
"loss": 0.163,
"step": 4420
},
{
"epoch": 3.3630669956348456,
"grad_norm": 0.12179459631443024,
"learning_rate": 9.540202508919663e-06,
"loss": 0.0025,
"step": 4430
},
{
"epoch": 3.3706585689884228,
"grad_norm": 0.000569705618545413,
"learning_rate": 9.536494545852854e-06,
"loss": 0.0433,
"step": 4440
},
{
"epoch": 3.3782501423420004,
"grad_norm": 0.0051111155189573765,
"learning_rate": 9.532772418151777e-06,
"loss": 0.1015,
"step": 4450
},
{
"epoch": 3.385841715695578,
"grad_norm": 0.0955556184053421,
"learning_rate": 9.529036137438304e-06,
"loss": 0.2303,
"step": 4460
},
{
"epoch": 3.393433289049155,
"grad_norm": 0.02819570153951645,
"learning_rate": 9.5252857153785e-06,
"loss": 0.0003,
"step": 4470
},
{
"epoch": 3.401024862402733,
"grad_norm": 0.005423153750598431,
"learning_rate": 9.521521163682593e-06,
"loss": 0.0102,
"step": 4480
},
{
"epoch": 3.4086164357563105,
"grad_norm": 0.8613097667694092,
"learning_rate": 9.517742494104918e-06,
"loss": 0.0005,
"step": 4490
},
{
"epoch": 3.416208009109888,
"grad_norm": 0.2508643567562103,
"learning_rate": 9.513949718443898e-06,
"loss": 0.0711,
"step": 4500
},
{
"epoch": 3.4237995824634657,
"grad_norm": 0.026635829359292984,
"learning_rate": 9.510142848541998e-06,
"loss": 0.0596,
"step": 4510
},
{
"epoch": 3.431391155817043,
"grad_norm": 0.0043787783943116665,
"learning_rate": 9.50632189628569e-06,
"loss": 0.3671,
"step": 4520
},
{
"epoch": 3.4389827291706205,
"grad_norm": 0.05850038304924965,
"learning_rate": 9.502486873605419e-06,
"loss": 0.1132,
"step": 4530
},
{
"epoch": 3.446574302524198,
"grad_norm": 157.52146911621094,
"learning_rate": 9.49863779247556e-06,
"loss": 0.1559,
"step": 4540
},
{
"epoch": 3.454165875877776,
"grad_norm": 0.02441789209842682,
"learning_rate": 9.494774664914385e-06,
"loss": 0.0658,
"step": 4550
},
{
"epoch": 3.461757449231353,
"grad_norm": 1.3454347848892212,
"learning_rate": 9.490897502984028e-06,
"loss": 0.0128,
"step": 4560
},
{
"epoch": 3.4693490225849306,
"grad_norm": 0.012022917158901691,
"learning_rate": 9.487006318790435e-06,
"loss": 0.0266,
"step": 4570
},
{
"epoch": 3.4769405959385082,
"grad_norm": 0.01288307923823595,
"learning_rate": 9.483101124483345e-06,
"loss": 0.0001,
"step": 4580
},
{
"epoch": 3.484532169292086,
"grad_norm": 26.168624877929688,
"learning_rate": 9.479181932256232e-06,
"loss": 0.0258,
"step": 4590
},
{
"epoch": 3.4921237426456635,
"grad_norm": 0.004901974927634001,
"learning_rate": 9.475248754346282e-06,
"loss": 0.1046,
"step": 4600
},
{
"epoch": 3.4997153159992407,
"grad_norm": 0.001919193658977747,
"learning_rate": 9.471301603034353e-06,
"loss": 0.0766,
"step": 4610
},
{
"epoch": 3.5073068893528183,
"grad_norm": 0.030080076307058334,
"learning_rate": 9.467340490644923e-06,
"loss": 0.0022,
"step": 4620
},
{
"epoch": 3.514898462706396,
"grad_norm": 0.041573066264390945,
"learning_rate": 9.463365429546073e-06,
"loss": 0.0357,
"step": 4630
},
{
"epoch": 3.5224900360599736,
"grad_norm": 30.251873016357422,
"learning_rate": 9.459376432149429e-06,
"loss": 0.0533,
"step": 4640
},
{
"epoch": 3.5300816094135508,
"grad_norm": 58.92287826538086,
"learning_rate": 9.455373510910135e-06,
"loss": 0.1241,
"step": 4650
},
{
"epoch": 3.5376731827671284,
"grad_norm": 0.015299913473427296,
"learning_rate": 9.45135667832681e-06,
"loss": 0.0672,
"step": 4660
},
{
"epoch": 3.545264756120706,
"grad_norm": 0.024773746728897095,
"learning_rate": 9.447325946941509e-06,
"loss": 0.0002,
"step": 4670
},
{
"epoch": 3.5528563294742836,
"grad_norm": 0.0013335061958059669,
"learning_rate": 9.443281329339682e-06,
"loss": 0.0002,
"step": 4680
},
{
"epoch": 3.5604479028278613,
"grad_norm": 0.003542415564879775,
"learning_rate": 9.439222838150141e-06,
"loss": 0.0053,
"step": 4690
},
{
"epoch": 3.5680394761814385,
"grad_norm": 0.004198325797915459,
"learning_rate": 9.435150486045019e-06,
"loss": 0.0021,
"step": 4700
},
{
"epoch": 3.575631049535016,
"grad_norm": 0.012465923093259335,
"learning_rate": 9.431064285739717e-06,
"loss": 0.391,
"step": 4710
},
{
"epoch": 3.5832226228885937,
"grad_norm": 19.51753044128418,
"learning_rate": 9.426964249992885e-06,
"loss": 0.0163,
"step": 4720
},
{
"epoch": 3.5908141962421714,
"grad_norm": 15.74682903289795,
"learning_rate": 9.42285039160637e-06,
"loss": 0.1393,
"step": 4730
},
{
"epoch": 3.5984057695957485,
"grad_norm": 0.001853258814662695,
"learning_rate": 9.418722723425179e-06,
"loss": 0.1333,
"step": 4740
},
{
"epoch": 3.605997342949326,
"grad_norm": 0.00429703202098608,
"learning_rate": 9.414581258337433e-06,
"loss": 0.041,
"step": 4750
},
{
"epoch": 3.613588916302904,
"grad_norm": 0.019961683079600334,
"learning_rate": 9.410426009274343e-06,
"loss": 0.0041,
"step": 4760
},
{
"epoch": 3.6211804896564814,
"grad_norm": 0.003665096592158079,
"learning_rate": 9.406256989210146e-06,
"loss": 0.1252,
"step": 4770
},
{
"epoch": 3.628772063010059,
"grad_norm": 59.87676239013672,
"learning_rate": 9.402074211162086e-06,
"loss": 0.2175,
"step": 4780
},
{
"epoch": 3.6363636363636362,
"grad_norm": 0.0013629102613776922,
"learning_rate": 9.397877688190362e-06,
"loss": 0.026,
"step": 4790
},
{
"epoch": 3.643955209717214,
"grad_norm": 0.004092271439731121,
"learning_rate": 9.39366743339809e-06,
"loss": 0.0061,
"step": 4800
},
{
"epoch": 3.6515467830707915,
"grad_norm": 0.06597864627838135,
"learning_rate": 9.38944345993126e-06,
"loss": 0.0974,
"step": 4810
},
{
"epoch": 3.6591383564243687,
"grad_norm": 0.0014479252276942134,
"learning_rate": 9.3852057809787e-06,
"loss": 0.1248,
"step": 4820
},
{
"epoch": 3.6667299297779463,
"grad_norm": 0.0007850687834434211,
"learning_rate": 9.380954409772029e-06,
"loss": 0.0674,
"step": 4830
},
{
"epoch": 3.674321503131524,
"grad_norm": 0.009199988096952438,
"learning_rate": 9.376689359585623e-06,
"loss": 0.0707,
"step": 4840
},
{
"epoch": 3.6819130764851016,
"grad_norm": 0.001353310770355165,
"learning_rate": 9.37241064373656e-06,
"loss": 0.0001,
"step": 4850
},
{
"epoch": 3.689504649838679,
"grad_norm": 0.0004105101979803294,
"learning_rate": 9.368118275584596e-06,
"loss": 0.0161,
"step": 4860
},
{
"epoch": 3.697096223192257,
"grad_norm": 0.005007717292755842,
"learning_rate": 9.36381226853211e-06,
"loss": 0.0854,
"step": 4870
},
{
"epoch": 3.704687796545834,
"grad_norm": 0.001610257662832737,
"learning_rate": 9.359492636024067e-06,
"loss": 0.0002,
"step": 4880
},
{
"epoch": 3.7122793698994117,
"grad_norm": 0.0029359892942011356,
"learning_rate": 9.35515939154798e-06,
"loss": 0.0001,
"step": 4890
},
{
"epoch": 3.7198709432529893,
"grad_norm": 0.016431191936135292,
"learning_rate": 9.350812548633862e-06,
"loss": 0.0407,
"step": 4900
},
{
"epoch": 3.7274625166065665,
"grad_norm": 0.00021083364845253527,
"learning_rate": 9.346452120854176e-06,
"loss": 0.0001,
"step": 4910
},
{
"epoch": 3.735054089960144,
"grad_norm": 0.0014973161742091179,
"learning_rate": 9.342078121823817e-06,
"loss": 0.2248,
"step": 4920
},
{
"epoch": 3.7426456633137217,
"grad_norm": 0.01354212500154972,
"learning_rate": 9.337690565200042e-06,
"loss": 0.07,
"step": 4930
},
{
"epoch": 3.7502372366672994,
"grad_norm": 0.07265155762434006,
"learning_rate": 9.333289464682452e-06,
"loss": 0.0486,
"step": 4940
},
{
"epoch": 3.757828810020877,
"grad_norm": 0.0004681596765294671,
"learning_rate": 9.328874834012925e-06,
"loss": 0.0063,
"step": 4950
},
{
"epoch": 3.7654203833744546,
"grad_norm": 0.01314933318644762,
"learning_rate": 9.324446686975592e-06,
"loss": 0.0853,
"step": 4960
},
{
"epoch": 3.773011956728032,
"grad_norm": 0.00873385276645422,
"learning_rate": 9.320005037396787e-06,
"loss": 0.0936,
"step": 4970
},
{
"epoch": 3.7806035300816094,
"grad_norm": 10.59278678894043,
"learning_rate": 9.315549899145001e-06,
"loss": 0.1606,
"step": 4980
},
{
"epoch": 3.788195103435187,
"grad_norm": 0.0031807045452296734,
"learning_rate": 9.311081286130846e-06,
"loss": 0.1216,
"step": 4990
},
{
"epoch": 3.7957866767887642,
"grad_norm": 0.15921778976917267,
"learning_rate": 9.306599212307001e-06,
"loss": 0.1834,
"step": 5000
},
{
"epoch": 3.803378250142342,
"grad_norm": 0.24746917188167572,
"learning_rate": 9.302103691668182e-06,
"loss": 0.0025,
"step": 5010
},
{
"epoch": 3.8109698234959195,
"grad_norm": 23.347986221313477,
"learning_rate": 9.297594738251085e-06,
"loss": 0.155,
"step": 5020
},
{
"epoch": 3.818561396849497,
"grad_norm": 10.753530502319336,
"learning_rate": 9.293072366134353e-06,
"loss": 0.1938,
"step": 5030
},
{
"epoch": 3.8261529702030748,
"grad_norm": 11.585359573364258,
"learning_rate": 9.288536589438523e-06,
"loss": 0.0768,
"step": 5040
},
{
"epoch": 3.833744543556652,
"grad_norm": 0.035775743424892426,
"learning_rate": 9.283987422325988e-06,
"loss": 0.0124,
"step": 5050
},
{
"epoch": 3.8413361169102296,
"grad_norm": 0.008631790988147259,
"learning_rate": 9.279424879000948e-06,
"loss": 0.0634,
"step": 5060
},
{
"epoch": 3.848927690263807,
"grad_norm": 8.152615547180176,
"learning_rate": 9.274848973709378e-06,
"loss": 0.0008,
"step": 5070
},
{
"epoch": 3.856519263617385,
"grad_norm": 0.00742849987000227,
"learning_rate": 9.270259720738962e-06,
"loss": 0.0023,
"step": 5080
},
{
"epoch": 3.864110836970962,
"grad_norm": 0.00474806921556592,
"learning_rate": 9.265657134419068e-06,
"loss": 0.0822,
"step": 5090
},
{
"epoch": 3.8717024103245397,
"grad_norm": 0.04680832102894783,
"learning_rate": 9.261041229120693e-06,
"loss": 0.4435,
"step": 5100
},
{
"epoch": 3.8792939836781173,
"grad_norm": 0.05589527264237404,
"learning_rate": 9.25641201925642e-06,
"loss": 0.0161,
"step": 5110
},
{
"epoch": 3.886885557031695,
"grad_norm": 0.0864788219332695,
"learning_rate": 9.251769519280377e-06,
"loss": 0.0042,
"step": 5120
},
{
"epoch": 3.8944771303852725,
"grad_norm": 0.0046981326304376125,
"learning_rate": 9.247113743688188e-06,
"loss": 0.0202,
"step": 5130
},
{
"epoch": 3.9020687037388497,
"grad_norm": 0.008091968484222889,
"learning_rate": 9.242444707016924e-06,
"loss": 0.0255,
"step": 5140
},
{
"epoch": 3.9096602770924274,
"grad_norm": 0.016733279451727867,
"learning_rate": 9.237762423845067e-06,
"loss": 0.0609,
"step": 5150
},
{
"epoch": 3.917251850446005,
"grad_norm": 110.93751525878906,
"learning_rate": 9.233066908792459e-06,
"loss": 0.0854,
"step": 5160
},
{
"epoch": 3.9248434237995826,
"grad_norm": 0.0014931544428691268,
"learning_rate": 9.228358176520256e-06,
"loss": 0.5116,
"step": 5170
},
{
"epoch": 3.93243499715316,
"grad_norm": 0.013354528695344925,
"learning_rate": 9.22363624173088e-06,
"loss": 0.1488,
"step": 5180
},
{
"epoch": 3.9400265705067374,
"grad_norm": 0.00550916837528348,
"learning_rate": 9.218901119167983e-06,
"loss": 0.3537,
"step": 5190
},
{
"epoch": 3.947618143860315,
"grad_norm": 29.100811004638672,
"learning_rate": 9.214152823616385e-06,
"loss": 0.2662,
"step": 5200
},
{
"epoch": 3.9552097172138927,
"grad_norm": 0.0014990021008998156,
"learning_rate": 9.209391369902048e-06,
"loss": 0.2909,
"step": 5210
},
{
"epoch": 3.9628012905674703,
"grad_norm": 0.2769727110862732,
"learning_rate": 9.20461677289201e-06,
"loss": 0.131,
"step": 5220
},
{
"epoch": 3.9703928639210475,
"grad_norm": 0.04668630287051201,
"learning_rate": 9.199829047494351e-06,
"loss": 0.001,
"step": 5230
},
{
"epoch": 3.977984437274625,
"grad_norm": 0.005737427622079849,
"learning_rate": 9.195028208658143e-06,
"loss": 0.1876,
"step": 5240
},
{
"epoch": 3.9855760106282028,
"grad_norm": 0.0012742755934596062,
"learning_rate": 9.190214271373399e-06,
"loss": 0.0296,
"step": 5250
},
{
"epoch": 3.99316758398178,
"grad_norm": 0.23183897137641907,
"learning_rate": 9.185387250671037e-06,
"loss": 0.0464,
"step": 5260
},
{
"epoch": 4.0,
"eval_f1": 0.9484240795008525,
"eval_loss": 0.1520499438047409,
"eval_precision": 0.9488593551067371,
"eval_recall": 0.9484066767830045,
"eval_runtime": 75.764,
"eval_samples_per_second": 17.396,
"eval_steps_per_second": 17.396,
"step": 5269
},
{
"epoch": 4.000759157335358,
"grad_norm": 0.015655217692255974,
"learning_rate": 9.18054716162282e-06,
"loss": 0.0778,
"step": 5270
},
{
"epoch": 4.008350730688935,
"grad_norm": 51.39549255371094,
"learning_rate": 9.175694019341321e-06,
"loss": 0.1821,
"step": 5280
},
{
"epoch": 4.015942304042513,
"grad_norm": 20.591053009033203,
"learning_rate": 9.170827838979864e-06,
"loss": 0.0411,
"step": 5290
},
{
"epoch": 4.0235338773960905,
"grad_norm": 0.00464022858068347,
"learning_rate": 9.165948635732487e-06,
"loss": 0.024,
"step": 5300
},
{
"epoch": 4.031125450749668,
"grad_norm": 0.9038947820663452,
"learning_rate": 9.161056424833888e-06,
"loss": 0.1133,
"step": 5310
},
{
"epoch": 4.038717024103246,
"grad_norm": 104.494384765625,
"learning_rate": 9.156151221559384e-06,
"loss": 0.0752,
"step": 5320
},
{
"epoch": 4.046308597456823,
"grad_norm": 0.003295379225164652,
"learning_rate": 9.151233041224851e-06,
"loss": 0.0697,
"step": 5330
},
{
"epoch": 4.0539001708104,
"grad_norm": 0.0672566145658493,
"learning_rate": 9.146301899186696e-06,
"loss": 0.0149,
"step": 5340
},
{
"epoch": 4.061491744163978,
"grad_norm": 0.020139316096901894,
"learning_rate": 9.141357810841785e-06,
"loss": 0.0004,
"step": 5350
},
{
"epoch": 4.069083317517555,
"grad_norm": 0.18405619263648987,
"learning_rate": 9.136400791627414e-06,
"loss": 0.0003,
"step": 5360
},
{
"epoch": 4.076674890871133,
"grad_norm": 0.011098051443696022,
"learning_rate": 9.131430857021252e-06,
"loss": 0.1502,
"step": 5370
},
{
"epoch": 4.084266464224711,
"grad_norm": 0.0007754967082291842,
"learning_rate": 9.126448022541296e-06,
"loss": 0.1435,
"step": 5380
},
{
"epoch": 4.091858037578288,
"grad_norm": 0.059689611196517944,
"learning_rate": 9.121452303745823e-06,
"loss": 0.2681,
"step": 5390
},
{
"epoch": 4.099449610931866,
"grad_norm": 23.187213897705078,
"learning_rate": 9.116443716233336e-06,
"loss": 0.0408,
"step": 5400
},
{
"epoch": 4.1070411842854435,
"grad_norm": 0.022440658882260323,
"learning_rate": 9.111422275642518e-06,
"loss": 0.0499,
"step": 5410
},
{
"epoch": 4.11463275763902,
"grad_norm": 0.04940136522054672,
"learning_rate": 9.10638799765219e-06,
"loss": 0.0007,
"step": 5420
},
{
"epoch": 4.122224330992598,
"grad_norm": 0.0109120924025774,
"learning_rate": 9.101340897981247e-06,
"loss": 0.0577,
"step": 5430
},
{
"epoch": 4.1298159043461755,
"grad_norm": 15.833015441894531,
"learning_rate": 9.096280992388629e-06,
"loss": 0.0016,
"step": 5440
},
{
"epoch": 4.137407477699753,
"grad_norm": 0.002290463075041771,
"learning_rate": 9.091208296673253e-06,
"loss": 0.0022,
"step": 5450
},
{
"epoch": 4.144999051053331,
"grad_norm": 0.006408984772861004,
"learning_rate": 9.086122826673976e-06,
"loss": 0.0004,
"step": 5460
},
{
"epoch": 4.152590624406908,
"grad_norm": 0.04329880699515343,
"learning_rate": 9.081024598269537e-06,
"loss": 0.0001,
"step": 5470
},
{
"epoch": 4.160182197760486,
"grad_norm": 0.0005604320904240012,
"learning_rate": 9.075913627378515e-06,
"loss": 0.1444,
"step": 5480
},
{
"epoch": 4.167773771114064,
"grad_norm": 0.0035607500467449427,
"learning_rate": 9.070789929959273e-06,
"loss": 0.0705,
"step": 5490
},
{
"epoch": 4.175365344467641,
"grad_norm": 21.509424209594727,
"learning_rate": 9.065653522009914e-06,
"loss": 0.0963,
"step": 5500
},
{
"epoch": 4.182956917821218,
"grad_norm": 0.040827080607414246,
"learning_rate": 9.060504419568226e-06,
"loss": 0.2367,
"step": 5510
},
{
"epoch": 4.190548491174796,
"grad_norm": 0.03268290311098099,
"learning_rate": 9.055342638711636e-06,
"loss": 0.1356,
"step": 5520
},
{
"epoch": 4.198140064528373,
"grad_norm": 0.02690727449953556,
"learning_rate": 9.050168195557152e-06,
"loss": 0.1927,
"step": 5530
},
{
"epoch": 4.205731637881951,
"grad_norm": 0.0010843976633623242,
"learning_rate": 9.044981106261327e-06,
"loss": 0.03,
"step": 5540
},
{
"epoch": 4.2133232112355286,
"grad_norm": 0.017938513308763504,
"learning_rate": 9.039781387020195e-06,
"loss": 0.0011,
"step": 5550
},
{
"epoch": 4.220914784589106,
"grad_norm": 0.11831680685281754,
"learning_rate": 9.034569054069222e-06,
"loss": 0.0028,
"step": 5560
},
{
"epoch": 4.228506357942684,
"grad_norm": 0.0017340222839266062,
"learning_rate": 9.029344123683269e-06,
"loss": 0.0004,
"step": 5570
},
{
"epoch": 4.236097931296261,
"grad_norm": 45.62750244140625,
"learning_rate": 9.024106612176519e-06,
"loss": 0.199,
"step": 5580
},
{
"epoch": 4.243689504649839,
"grad_norm": 0.00023749677347950637,
"learning_rate": 9.019382108477498e-06,
"loss": 0.0737,
"step": 5590
},
{
"epoch": 4.251281078003416,
"grad_norm": 0.0017125029116868973,
"learning_rate": 9.014120737927479e-06,
"loss": 0.0038,
"step": 5600
},
{
"epoch": 4.258872651356993,
"grad_norm": 0.005647186189889908,
"learning_rate": 9.008846833789777e-06,
"loss": 0.0524,
"step": 5610
},
{
"epoch": 4.266464224710571,
"grad_norm": 0.02812052331864834,
"learning_rate": 9.003560412531492e-06,
"loss": 0.0008,
"step": 5620
},
{
"epoch": 4.274055798064149,
"grad_norm": 0.004697522614151239,
"learning_rate": 8.99826149065881e-06,
"loss": 0.022,
"step": 5630
},
{
"epoch": 4.281647371417726,
"grad_norm": 0.000999168842099607,
"learning_rate": 8.992950084716952e-06,
"loss": 0.0255,
"step": 5640
},
{
"epoch": 4.289238944771304,
"grad_norm": 0.00024819112149998546,
"learning_rate": 8.987626211290112e-06,
"loss": 0.1814,
"step": 5650
},
{
"epoch": 4.296830518124882,
"grad_norm": 15.028079986572266,
"learning_rate": 8.982289887001419e-06,
"loss": 0.0483,
"step": 5660
},
{
"epoch": 4.304422091478459,
"grad_norm": 0.012629321776330471,
"learning_rate": 8.976941128512873e-06,
"loss": 0.0727,
"step": 5670
},
{
"epoch": 4.312013664832037,
"grad_norm": 0.02232271246612072,
"learning_rate": 8.9715799525253e-06,
"loss": 0.1076,
"step": 5680
},
{
"epoch": 4.319605238185614,
"grad_norm": 0.013221162371337414,
"learning_rate": 8.966206375778302e-06,
"loss": 0.1304,
"step": 5690
},
{
"epoch": 4.327196811539191,
"grad_norm": 20.240745544433594,
"learning_rate": 8.960820415050193e-06,
"loss": 0.0818,
"step": 5700
},
{
"epoch": 4.334788384892769,
"grad_norm": 0.9472859501838684,
"learning_rate": 8.955422087157962e-06,
"loss": 0.0875,
"step": 5710
},
{
"epoch": 4.3423799582463465,
"grad_norm": 0.24365593492984772,
"learning_rate": 8.950011408957206e-06,
"loss": 0.0052,
"step": 5720
},
{
"epoch": 4.349971531599924,
"grad_norm": 0.5765083432197571,
"learning_rate": 8.944588397342093e-06,
"loss": 0.3057,
"step": 5730
},
{
"epoch": 4.357563104953502,
"grad_norm": 36.48699951171875,
"learning_rate": 8.939153069245291e-06,
"loss": 0.1687,
"step": 5740
},
{
"epoch": 4.365154678307079,
"grad_norm": 0.011977112852036953,
"learning_rate": 8.933705441637931e-06,
"loss": 0.0129,
"step": 5750
},
{
"epoch": 4.372746251660657,
"grad_norm": 0.049162607640028,
"learning_rate": 8.928245531529546e-06,
"loss": 0.0747,
"step": 5760
},
{
"epoch": 4.380337825014234,
"grad_norm": 0.006424940191209316,
"learning_rate": 8.922773355968018e-06,
"loss": 0.0001,
"step": 5770
},
{
"epoch": 4.387929398367811,
"grad_norm": 0.0021049147471785545,
"learning_rate": 8.91728893203953e-06,
"loss": 0.0011,
"step": 5780
},
{
"epoch": 4.395520971721389,
"grad_norm": 0.005935146939009428,
"learning_rate": 8.911792276868502e-06,
"loss": 0.0685,
"step": 5790
},
{
"epoch": 4.403112545074967,
"grad_norm": 0.16192130744457245,
"learning_rate": 8.906283407617555e-06,
"loss": 0.0789,
"step": 5800
},
{
"epoch": 4.410704118428544,
"grad_norm": 0.0363471657037735,
"learning_rate": 8.900762341487439e-06,
"loss": 0.0003,
"step": 5810
},
{
"epoch": 4.418295691782122,
"grad_norm": 0.03035406582057476,
"learning_rate": 8.895229095716988e-06,
"loss": 0.0004,
"step": 5820
},
{
"epoch": 4.4258872651356995,
"grad_norm": 0.0051777479238808155,
"learning_rate": 8.889683687583067e-06,
"loss": 0.0974,
"step": 5830
},
{
"epoch": 4.433478838489277,
"grad_norm": 0.001428132993169129,
"learning_rate": 8.884126134400516e-06,
"loss": 0.0104,
"step": 5840
},
{
"epoch": 4.441070411842855,
"grad_norm": 0.029337646439671516,
"learning_rate": 8.8785564535221e-06,
"loss": 0.1961,
"step": 5850
},
{
"epoch": 4.448661985196432,
"grad_norm": 103.57210540771484,
"learning_rate": 8.872974662338443e-06,
"loss": 0.0941,
"step": 5860
},
{
"epoch": 4.456253558550009,
"grad_norm": 0.006421659607440233,
"learning_rate": 8.86738077827799e-06,
"loss": 0.0586,
"step": 5870
},
{
"epoch": 4.463845131903587,
"grad_norm": 0.21757641434669495,
"learning_rate": 8.861774818806939e-06,
"loss": 0.1107,
"step": 5880
},
{
"epoch": 4.471436705257164,
"grad_norm": 0.2700095474720001,
"learning_rate": 8.856156801429196e-06,
"loss": 0.1388,
"step": 5890
},
{
"epoch": 4.479028278610742,
"grad_norm": 0.0029901862144470215,
"learning_rate": 8.850526743686314e-06,
"loss": 0.1908,
"step": 5900
},
{
"epoch": 4.48661985196432,
"grad_norm": 0.008274559862911701,
"learning_rate": 8.844884663157441e-06,
"loss": 0.0842,
"step": 5910
},
{
"epoch": 4.494211425317897,
"grad_norm": 0.006725401151925325,
"learning_rate": 8.83923057745926e-06,
"loss": 0.0003,
"step": 5920
},
{
"epoch": 4.501802998671475,
"grad_norm": 13.423134803771973,
"learning_rate": 8.833564504245953e-06,
"loss": 0.0658,
"step": 5930
},
{
"epoch": 4.509394572025053,
"grad_norm": 0.047781139612197876,
"learning_rate": 8.827886461209114e-06,
"loss": 0.0008,
"step": 5940
},
{
"epoch": 4.516986145378629,
"grad_norm": 0.0009586279047653079,
"learning_rate": 8.82219646607772e-06,
"loss": 0.0003,
"step": 5950
},
{
"epoch": 4.524577718732207,
"grad_norm": 0.07489871978759766,
"learning_rate": 8.816494536618069e-06,
"loss": 0.0003,
"step": 5960
},
{
"epoch": 4.532169292085785,
"grad_norm": 0.015722280368208885,
"learning_rate": 8.810780690633715e-06,
"loss": 0.1269,
"step": 5970
},
{
"epoch": 4.539760865439362,
"grad_norm": 0.01760883256793022,
"learning_rate": 8.805054945965429e-06,
"loss": 0.0659,
"step": 5980
},
{
"epoch": 4.54735243879294,
"grad_norm": 0.03223474696278572,
"learning_rate": 8.799317320491125e-06,
"loss": 0.0005,
"step": 5990
},
{
"epoch": 4.5549440121465175,
"grad_norm": 0.0017072842456400394,
"learning_rate": 8.793567832125823e-06,
"loss": 0.1485,
"step": 6000
},
{
"epoch": 4.562535585500095,
"grad_norm": 0.0031113557051867247,
"learning_rate": 8.787806498821572e-06,
"loss": 0.0058,
"step": 6010
},
{
"epoch": 4.570127158853673,
"grad_norm": 0.016612514853477478,
"learning_rate": 8.782033338567414e-06,
"loss": 0.0294,
"step": 6020
},
{
"epoch": 4.57771873220725,
"grad_norm": 0.010033627972006798,
"learning_rate": 8.776248369389319e-06,
"loss": 0.064,
"step": 6030
},
{
"epoch": 4.585310305560828,
"grad_norm": 0.007523770444095135,
"learning_rate": 8.770451609350123e-06,
"loss": 0.1784,
"step": 6040
},
{
"epoch": 4.592901878914405,
"grad_norm": 0.0006488583167083561,
"learning_rate": 8.764643076549481e-06,
"loss": 0.0001,
"step": 6050
},
{
"epoch": 4.600493452267982,
"grad_norm": 49.0224494934082,
"learning_rate": 8.75882278912381e-06,
"loss": 0.1479,
"step": 6060
},
{
"epoch": 4.60808502562156,
"grad_norm": 0.05112855136394501,
"learning_rate": 8.752990765246222e-06,
"loss": 0.0742,
"step": 6070
},
{
"epoch": 4.615676598975138,
"grad_norm": 0.007768516894429922,
"learning_rate": 8.747147023126486e-06,
"loss": 0.0547,
"step": 6080
},
{
"epoch": 4.623268172328715,
"grad_norm": 0.03929920494556427,
"learning_rate": 8.741291581010945e-06,
"loss": 0.0005,
"step": 6090
},
{
"epoch": 4.630859745682293,
"grad_norm": 0.0333462730050087,
"learning_rate": 8.735424457182483e-06,
"loss": 0.0912,
"step": 6100
},
{
"epoch": 4.6384513190358705,
"grad_norm": 0.0021920499857515097,
"learning_rate": 8.729545669960459e-06,
"loss": 0.0025,
"step": 6110
},
{
"epoch": 4.646042892389447,
"grad_norm": 0.24167831242084503,
"learning_rate": 8.723655237700646e-06,
"loss": 0.0184,
"step": 6120
},
{
"epoch": 4.653634465743025,
"grad_norm": 0.01909787394106388,
"learning_rate": 8.71775317879518e-06,
"loss": 0.0001,
"step": 6130
},
{
"epoch": 4.6612260390966025,
"grad_norm": 97.6840591430664,
"learning_rate": 8.711839511672497e-06,
"loss": 0.0578,
"step": 6140
},
{
"epoch": 4.66881761245018,
"grad_norm": 0.000244935043156147,
"learning_rate": 8.705914254797283e-06,
"loss": 0.1423,
"step": 6150
},
{
"epoch": 4.676409185803758,
"grad_norm": 0.0006741081597283483,
"learning_rate": 8.699977426670403e-06,
"loss": 0.0306,
"step": 6160
},
{
"epoch": 4.684000759157335,
"grad_norm": 0.001535810879431665,
"learning_rate": 8.69402904582886e-06,
"loss": 0.0496,
"step": 6170
},
{
"epoch": 4.691592332510913,
"grad_norm": 0.4821704030036926,
"learning_rate": 8.688069130845725e-06,
"loss": 0.0443,
"step": 6180
},
{
"epoch": 4.699183905864491,
"grad_norm": 0.002279536332935095,
"learning_rate": 8.682097700330086e-06,
"loss": 0.0222,
"step": 6190
},
{
"epoch": 4.706775479218068,
"grad_norm": 0.009520245715975761,
"learning_rate": 8.67611477292698e-06,
"loss": 0.1731,
"step": 6200
},
{
"epoch": 4.714367052571646,
"grad_norm": 0.1851215660572052,
"learning_rate": 8.67012036731735e-06,
"loss": 0.0629,
"step": 6210
},
{
"epoch": 4.721958625925223,
"grad_norm": 0.12576204538345337,
"learning_rate": 8.664114502217975e-06,
"loss": 0.0448,
"step": 6220
},
{
"epoch": 4.7295501992788,
"grad_norm": 0.015547769144177437,
"learning_rate": 8.65809719638141e-06,
"loss": 0.0147,
"step": 6230
},
{
"epoch": 4.737141772632378,
"grad_norm": 0.2670181095600128,
"learning_rate": 8.65206846859594e-06,
"loss": 0.0005,
"step": 6240
},
{
"epoch": 4.7447333459859555,
"grad_norm": 0.028395511209964752,
"learning_rate": 8.646028337685509e-06,
"loss": 0.05,
"step": 6250
},
{
"epoch": 4.752324919339533,
"grad_norm": 0.018742332234978676,
"learning_rate": 8.639976822509666e-06,
"loss": 0.2398,
"step": 6260
},
{
"epoch": 4.759916492693111,
"grad_norm": 12.270938873291016,
"learning_rate": 8.633913941963507e-06,
"loss": 0.313,
"step": 6270
},
{
"epoch": 4.767508066046688,
"grad_norm": 0.07293716818094254,
"learning_rate": 8.627839714977618e-06,
"loss": 0.0008,
"step": 6280
},
{
"epoch": 4.775099639400266,
"grad_norm": 0.06347032636404037,
"learning_rate": 8.621754160518005e-06,
"loss": 0.0221,
"step": 6290
},
{
"epoch": 4.782691212753843,
"grad_norm": 0.0011452403850853443,
"learning_rate": 8.615657297586051e-06,
"loss": 0.1013,
"step": 6300
},
{
"epoch": 4.79028278610742,
"grad_norm": 0.0021203244104981422,
"learning_rate": 8.609549145218442e-06,
"loss": 0.0007,
"step": 6310
},
{
"epoch": 4.797874359460998,
"grad_norm": 0.006574318744242191,
"learning_rate": 8.603429722487117e-06,
"loss": 0.0725,
"step": 6320
},
{
"epoch": 4.805465932814576,
"grad_norm": 0.00014791313151363283,
"learning_rate": 8.597299048499206e-06,
"loss": 0.0532,
"step": 6330
},
{
"epoch": 4.813057506168153,
"grad_norm": 0.12207093834877014,
"learning_rate": 8.591157142396966e-06,
"loss": 0.1137,
"step": 6340
},
{
"epoch": 4.820649079521731,
"grad_norm": 0.027442127466201782,
"learning_rate": 8.58500402335773e-06,
"loss": 0.0812,
"step": 6350
},
{
"epoch": 4.828240652875309,
"grad_norm": 0.00018395182269159704,
"learning_rate": 8.578839710593836e-06,
"loss": 0.1686,
"step": 6360
},
{
"epoch": 4.835832226228886,
"grad_norm": 0.06821048259735107,
"learning_rate": 8.57266422335258e-06,
"loss": 0.0005,
"step": 6370
},
{
"epoch": 4.843423799582464,
"grad_norm": 9.863347804639488e-05,
"learning_rate": 8.56647758091614e-06,
"loss": 0.0005,
"step": 6380
},
{
"epoch": 4.8510153729360415,
"grad_norm": 0.0016949453856796026,
"learning_rate": 8.560279802601533e-06,
"loss": 0.1504,
"step": 6390
},
{
"epoch": 4.858606946289618,
"grad_norm": 0.0009430780191905797,
"learning_rate": 8.554070907760544e-06,
"loss": 0.0,
"step": 6400
},
{
"epoch": 4.866198519643196,
"grad_norm": 0.02552955597639084,
"learning_rate": 8.547850915779662e-06,
"loss": 0.0001,
"step": 6410
},
{
"epoch": 4.8737900929967735,
"grad_norm": 0.014719455502927303,
"learning_rate": 8.541619846080039e-06,
"loss": 0.15,
"step": 6420
},
{
"epoch": 4.881381666350351,
"grad_norm": 0.09882048517465591,
"learning_rate": 8.535377718117399e-06,
"loss": 0.0569,
"step": 6430
},
{
"epoch": 4.888973239703929,
"grad_norm": 0.22454605996608734,
"learning_rate": 8.52912455138201e-06,
"loss": 0.1482,
"step": 6440
},
{
"epoch": 4.896564813057506,
"grad_norm": 0.08625132590532303,
"learning_rate": 8.52286036539859e-06,
"loss": 0.0011,
"step": 6450
},
{
"epoch": 4.904156386411084,
"grad_norm": 0.03739362582564354,
"learning_rate": 8.51658517972628e-06,
"loss": 0.1778,
"step": 6460
},
{
"epoch": 4.911747959764662,
"grad_norm": 0.21021807193756104,
"learning_rate": 8.510299013958559e-06,
"loss": 0.0011,
"step": 6470
},
{
"epoch": 4.919339533118238,
"grad_norm": 0.04205634444952011,
"learning_rate": 8.504001887723185e-06,
"loss": 0.0787,
"step": 6480
},
{
"epoch": 4.926931106471816,
"grad_norm": 0.09222347289323807,
"learning_rate": 8.497693820682146e-06,
"loss": 0.0006,
"step": 6490
},
{
"epoch": 4.934522679825394,
"grad_norm": 0.1209307536482811,
"learning_rate": 8.491374832531591e-06,
"loss": 0.053,
"step": 6500
},
{
"epoch": 4.942114253178971,
"grad_norm": 0.009995940141379833,
"learning_rate": 8.485044943001763e-06,
"loss": 0.0096,
"step": 6510
},
{
"epoch": 4.949705826532549,
"grad_norm": 0.018289346247911453,
"learning_rate": 8.47870417185695e-06,
"loss": 0.0012,
"step": 6520
},
{
"epoch": 4.9572973998861265,
"grad_norm": 65.71520233154297,
"learning_rate": 8.472352538895411e-06,
"loss": 0.1783,
"step": 6530
},
{
"epoch": 4.964888973239704,
"grad_norm": 57.22151184082031,
"learning_rate": 8.465990063949323e-06,
"loss": 0.1034,
"step": 6540
},
{
"epoch": 4.972480546593282,
"grad_norm": 0.003517146920785308,
"learning_rate": 8.459616766884713e-06,
"loss": 0.0024,
"step": 6550
},
{
"epoch": 4.980072119946859,
"grad_norm": 0.0020259765442460775,
"learning_rate": 8.453232667601403e-06,
"loss": 0.0001,
"step": 6560
},
{
"epoch": 4.987663693300436,
"grad_norm": 0.007150826510041952,
"learning_rate": 8.44683778603294e-06,
"loss": 0.1704,
"step": 6570
},
{
"epoch": 4.995255266654014,
"grad_norm": 0.0018830208573490381,
"learning_rate": 8.440432142146535e-06,
"loss": 0.0349,
"step": 6580
},
{
"epoch": 4.99981021066616,
"eval_f1": 0.9552939310725507,
"eval_loss": 0.15884605050086975,
"eval_precision": 0.9567644368540595,
"eval_recall": 0.9552352048558422,
"eval_runtime": 75.7698,
"eval_samples_per_second": 17.395,
"eval_steps_per_second": 17.395,
"step": 6586
},
{
"epoch": 5.002846840007591,
"grad_norm": 0.002661398844793439,
"learning_rate": 8.434015755943013e-06,
"loss": 0.0002,
"step": 6590
},
{
"epoch": 5.010438413361169,
"grad_norm": 0.015321805141866207,
"learning_rate": 8.427588647456727e-06,
"loss": 0.0006,
"step": 6600
},
{
"epoch": 5.018029986714747,
"grad_norm": 0.015539165586233139,
"learning_rate": 8.42115083675552e-06,
"loss": 0.128,
"step": 6610
},
{
"epoch": 5.025621560068324,
"grad_norm": 5.927582263946533,
"learning_rate": 8.414702343940647e-06,
"loss": 0.0743,
"step": 6620
},
{
"epoch": 5.033213133421902,
"grad_norm": 0.0004428077954798937,
"learning_rate": 8.408243189146714e-06,
"loss": 0.0764,
"step": 6630
},
{
"epoch": 5.0408047067754795,
"grad_norm": 13.519503593444824,
"learning_rate": 8.401773392541621e-06,
"loss": 0.0837,
"step": 6640
},
{
"epoch": 5.048396280129057,
"grad_norm": 0.0011204121401533484,
"learning_rate": 8.395292974326497e-06,
"loss": 0.0001,
"step": 6650
},
{
"epoch": 5.055987853482634,
"grad_norm": 0.005702109541743994,
"learning_rate": 8.388801954735632e-06,
"loss": 0.0003,
"step": 6660
},
{
"epoch": 5.0635794268362115,
"grad_norm": 0.009877257980406284,
"learning_rate": 8.38230035403642e-06,
"loss": 0.0001,
"step": 6670
},
{
"epoch": 5.071171000189789,
"grad_norm": 0.0006185189704410732,
"learning_rate": 8.375788192529292e-06,
"loss": 0.0002,
"step": 6680
},
{
"epoch": 5.078762573543367,
"grad_norm": 0.0004436051531229168,
"learning_rate": 8.369265490547653e-06,
"loss": 0.0004,
"step": 6690
},
{
"epoch": 5.086354146896944,
"grad_norm": 0.016778159886598587,
"learning_rate": 8.362732268457824e-06,
"loss": 0.1505,
"step": 6700
},
{
"epoch": 5.093945720250522,
"grad_norm": 0.13505133986473083,
"learning_rate": 8.356188546658966e-06,
"loss": 0.0825,
"step": 6710
},
{
"epoch": 5.1015372936041,
"grad_norm": 0.015829697251319885,
"learning_rate": 8.34963434558303e-06,
"loss": 0.106,
"step": 6720
},
{
"epoch": 5.109128866957677,
"grad_norm": 0.006577119696885347,
"learning_rate": 8.343069685694687e-06,
"loss": 0.1537,
"step": 6730
},
{
"epoch": 5.116720440311255,
"grad_norm": 0.0571792870759964,
"learning_rate": 8.33649458749126e-06,
"loss": 0.023,
"step": 6740
},
{
"epoch": 5.124312013664832,
"grad_norm": 0.13444474339485168,
"learning_rate": 8.329909071502668e-06,
"loss": 0.1881,
"step": 6750
},
{
"epoch": 5.131903587018409,
"grad_norm": 0.011354477144777775,
"learning_rate": 8.32331315829136e-06,
"loss": 0.2186,
"step": 6760
},
{
"epoch": 5.139495160371987,
"grad_norm": 0.11647947877645493,
"learning_rate": 8.31670686845224e-06,
"loss": 0.0005,
"step": 6770
},
{
"epoch": 5.147086733725565,
"grad_norm": 0.03318728879094124,
"learning_rate": 8.310090222612623e-06,
"loss": 0.0004,
"step": 6780
},
{
"epoch": 5.154678307079142,
"grad_norm": 0.0020830295979976654,
"learning_rate": 8.303463241432156e-06,
"loss": 0.0738,
"step": 6790
},
{
"epoch": 5.16226988043272,
"grad_norm": 0.18546123802661896,
"learning_rate": 8.296825945602749e-06,
"loss": 0.225,
"step": 6800
},
{
"epoch": 5.1698614537862975,
"grad_norm": 0.013226731680333614,
"learning_rate": 8.290178355848528e-06,
"loss": 0.0024,
"step": 6810
},
{
"epoch": 5.177453027139875,
"grad_norm": 0.0015887143090367317,
"learning_rate": 8.283520492925758e-06,
"loss": 0.1161,
"step": 6820
},
{
"epoch": 5.185044600493452,
"grad_norm": 12.341133117675781,
"learning_rate": 8.276852377622777e-06,
"loss": 0.0333,
"step": 6830
},
{
"epoch": 5.1926361738470295,
"grad_norm": 0.48488712310791016,
"learning_rate": 8.270174030759939e-06,
"loss": 0.0025,
"step": 6840
},
{
"epoch": 5.200227747200607,
"grad_norm": 0.09974020719528198,
"learning_rate": 8.263485473189542e-06,
"loss": 0.0003,
"step": 6850
},
{
"epoch": 5.207819320554185,
"grad_norm": 0.005017921794205904,
"learning_rate": 8.256786725795767e-06,
"loss": 0.0707,
"step": 6860
},
{
"epoch": 5.215410893907762,
"grad_norm": 16.735441207885742,
"learning_rate": 8.250077809494612e-06,
"loss": 0.1761,
"step": 6870
},
{
"epoch": 5.22300246726134,
"grad_norm": 0.08619498461484909,
"learning_rate": 8.243358745233822e-06,
"loss": 0.0025,
"step": 6880
},
{
"epoch": 5.230594040614918,
"grad_norm": 0.008258694782853127,
"learning_rate": 8.236629553992837e-06,
"loss": 0.1096,
"step": 6890
},
{
"epoch": 5.238185613968495,
"grad_norm": 0.032047972083091736,
"learning_rate": 8.229890256782705e-06,
"loss": 0.0774,
"step": 6900
},
{
"epoch": 5.245777187322073,
"grad_norm": 0.12164535373449326,
"learning_rate": 8.223140874646039e-06,
"loss": 0.041,
"step": 6910
},
{
"epoch": 5.25336876067565,
"grad_norm": 0.30879223346710205,
"learning_rate": 8.216381428656935e-06,
"loss": 0.0008,
"step": 6920
},
{
"epoch": 5.260960334029227,
"grad_norm": 0.011329672299325466,
"learning_rate": 8.209611939920912e-06,
"loss": 0.0507,
"step": 6930
},
{
"epoch": 5.268551907382805,
"grad_norm": 0.0024318841751664877,
"learning_rate": 8.202832429574851e-06,
"loss": 0.0511,
"step": 6940
},
{
"epoch": 5.2761434807363825,
"grad_norm": 0.06363888084888458,
"learning_rate": 8.196042918786923e-06,
"loss": 0.0418,
"step": 6950
},
{
"epoch": 5.28373505408996,
"grad_norm": 0.006296386010944843,
"learning_rate": 8.189243428756518e-06,
"loss": 0.0013,
"step": 6960
},
{
"epoch": 5.291326627443538,
"grad_norm": 1.5055712461471558,
"learning_rate": 8.182433980714191e-06,
"loss": 0.0003,
"step": 6970
},
{
"epoch": 5.298918200797115,
"grad_norm": 0.04809055104851723,
"learning_rate": 8.175614595921589e-06,
"loss": 0.0001,
"step": 6980
},
{
"epoch": 5.306509774150693,
"grad_norm": 0.0006017005071043968,
"learning_rate": 8.168785295671385e-06,
"loss": 0.0001,
"step": 6990
},
{
"epoch": 5.314101347504271,
"grad_norm": 0.05823567882180214,
"learning_rate": 8.161946101287205e-06,
"loss": 0.1,
"step": 7000
},
{
"epoch": 5.321692920857847,
"grad_norm": 0.21126702427864075,
"learning_rate": 8.155097034123582e-06,
"loss": 0.0012,
"step": 7010
},
{
"epoch": 5.329284494211425,
"grad_norm": 0.005064593628048897,
"learning_rate": 8.148238115565865e-06,
"loss": 0.2162,
"step": 7020
},
{
"epoch": 5.336876067565003,
"grad_norm": 0.03429802507162094,
"learning_rate": 8.141369367030165e-06,
"loss": 0.0068,
"step": 7030
},
{
"epoch": 5.34446764091858,
"grad_norm": 0.019597377628087997,
"learning_rate": 8.134490809963285e-06,
"loss": 0.0447,
"step": 7040
},
{
"epoch": 5.352059214272158,
"grad_norm": 3.237245559692383,
"learning_rate": 8.127602465842656e-06,
"loss": 0.0408,
"step": 7050
},
{
"epoch": 5.3596507876257355,
"grad_norm": 0.1109641045331955,
"learning_rate": 8.12070435617627e-06,
"loss": 0.0041,
"step": 7060
},
{
"epoch": 5.367242360979313,
"grad_norm": 1.6172115802764893,
"learning_rate": 8.113796502502605e-06,
"loss": 0.0008,
"step": 7070
},
{
"epoch": 5.374833934332891,
"grad_norm": 0.0019253261853009462,
"learning_rate": 8.106878926390565e-06,
"loss": 0.0106,
"step": 7080
},
{
"epoch": 5.382425507686468,
"grad_norm": 0.010185305029153824,
"learning_rate": 8.099951649439415e-06,
"loss": 0.17,
"step": 7090
},
{
"epoch": 5.390017081040045,
"grad_norm": 0.00028460906469263136,
"learning_rate": 8.093014693278705e-06,
"loss": 0.0814,
"step": 7100
},
{
"epoch": 5.397608654393623,
"grad_norm": 0.09348779916763306,
"learning_rate": 8.08606807956821e-06,
"loss": 0.0562,
"step": 7110
},
{
"epoch": 5.4052002277472,
"grad_norm": 0.01985323429107666,
"learning_rate": 8.079111829997861e-06,
"loss": 0.0004,
"step": 7120
},
{
"epoch": 5.412791801100778,
"grad_norm": 0.084492027759552,
"learning_rate": 8.072145966287668e-06,
"loss": 0.0393,
"step": 7130
},
{
"epoch": 5.420383374454356,
"grad_norm": 0.008949169889092445,
"learning_rate": 8.06517051018767e-06,
"loss": 0.0027,
"step": 7140
},
{
"epoch": 5.427974947807933,
"grad_norm": 0.010001681745052338,
"learning_rate": 8.058185483477849e-06,
"loss": 0.0002,
"step": 7150
},
{
"epoch": 5.435566521161511,
"grad_norm": 0.00013484137889463454,
"learning_rate": 8.051190907968077e-06,
"loss": 0.0617,
"step": 7160
},
{
"epoch": 5.443158094515089,
"grad_norm": 0.028125835582613945,
"learning_rate": 8.044186805498033e-06,
"loss": 0.0003,
"step": 7170
},
{
"epoch": 5.450749667868665,
"grad_norm": 0.011845303699374199,
"learning_rate": 8.037173197937149e-06,
"loss": 0.0002,
"step": 7180
},
{
"epoch": 5.458341241222243,
"grad_norm": 0.021918371319770813,
"learning_rate": 8.030150107184535e-06,
"loss": 0.0003,
"step": 7190
},
{
"epoch": 5.465932814575821,
"grad_norm": 0.002744874684140086,
"learning_rate": 8.023117555168907e-06,
"loss": 0.0174,
"step": 7200
},
{
"epoch": 5.473524387929398,
"grad_norm": 0.0008592222584411502,
"learning_rate": 8.016075563848524e-06,
"loss": 0.0001,
"step": 7210
},
{
"epoch": 5.481115961282976,
"grad_norm": 0.0009818489197641611,
"learning_rate": 8.009024155211125e-06,
"loss": 0.0001,
"step": 7220
},
{
"epoch": 5.4887075346365535,
"grad_norm": 0.0036790217272937298,
"learning_rate": 8.001963351273843e-06,
"loss": 0.0001,
"step": 7230
},
{
"epoch": 5.496299107990131,
"grad_norm": 0.009668831713497639,
"learning_rate": 7.994893174083151e-06,
"loss": 0.0663,
"step": 7240
},
{
"epoch": 5.503890681343709,
"grad_norm": 0.008087705820798874,
"learning_rate": 7.98781364571479e-06,
"loss": 0.0,
"step": 7250
},
{
"epoch": 5.511482254697286,
"grad_norm": 0.001750052673742175,
"learning_rate": 7.980724788273698e-06,
"loss": 0.0001,
"step": 7260
},
{
"epoch": 5.519073828050864,
"grad_norm": 0.0040147858671844006,
"learning_rate": 7.973626623893942e-06,
"loss": 0.1629,
"step": 7270
},
{
"epoch": 5.526665401404441,
"grad_norm": 0.005076427478343248,
"learning_rate": 7.96651917473865e-06,
"loss": 0.0001,
"step": 7280
},
{
"epoch": 5.534256974758018,
"grad_norm": 0.022049933671951294,
"learning_rate": 7.959402462999934e-06,
"loss": 0.0001,
"step": 7290
},
{
"epoch": 5.541848548111596,
"grad_norm": 7.288018226623535,
"learning_rate": 7.952276510898838e-06,
"loss": 0.0612,
"step": 7300
},
{
"epoch": 5.549440121465174,
"grad_norm": 0.48564571142196655,
"learning_rate": 7.945141340685249e-06,
"loss": 0.0001,
"step": 7310
},
{
"epoch": 5.557031694818751,
"grad_norm": 0.0020839564967900515,
"learning_rate": 7.937996974637839e-06,
"loss": 0.0002,
"step": 7320
},
{
"epoch": 5.564623268172329,
"grad_norm": 0.0012567265657708049,
"learning_rate": 7.930843435063996e-06,
"loss": 0.0003,
"step": 7330
},
{
"epoch": 5.5722148415259065,
"grad_norm": 0.0036961582954972982,
"learning_rate": 7.923680744299747e-06,
"loss": 0.0876,
"step": 7340
},
{
"epoch": 5.579806414879484,
"grad_norm": 31.300655364990234,
"learning_rate": 7.916508924709693e-06,
"loss": 0.151,
"step": 7350
},
{
"epoch": 5.587397988233061,
"grad_norm": 0.008196866139769554,
"learning_rate": 7.909327998686942e-06,
"loss": 0.0001,
"step": 7360
},
{
"epoch": 5.5949895615866385,
"grad_norm": 10.782143592834473,
"learning_rate": 7.902137988653032e-06,
"loss": 0.0539,
"step": 7370
},
{
"epoch": 5.602581134940216,
"grad_norm": 0.004750726278871298,
"learning_rate": 7.894938917057866e-06,
"loss": 0.0385,
"step": 7380
},
{
"epoch": 5.610172708293794,
"grad_norm": 0.04581161588430405,
"learning_rate": 7.887730806379641e-06,
"loss": 0.2684,
"step": 7390
},
{
"epoch": 5.617764281647371,
"grad_norm": 0.026009181514382362,
"learning_rate": 7.880513679124777e-06,
"loss": 0.1283,
"step": 7400
},
{
"epoch": 5.625355855000949,
"grad_norm": 2.0138673782348633,
"learning_rate": 7.873287557827846e-06,
"loss": 0.0004,
"step": 7410
},
{
"epoch": 5.632947428354527,
"grad_norm": 0.14630401134490967,
"learning_rate": 7.866052465051506e-06,
"loss": 0.0503,
"step": 7420
},
{
"epoch": 5.640539001708104,
"grad_norm": 0.0008778591873124242,
"learning_rate": 7.858808423386422e-06,
"loss": 0.0032,
"step": 7430
},
{
"epoch": 5.648130575061682,
"grad_norm": 0.004400940611958504,
"learning_rate": 7.851555455451208e-06,
"loss": 0.0002,
"step": 7440
},
{
"epoch": 5.6557221484152596,
"grad_norm": 0.002588229486718774,
"learning_rate": 7.844293583892341e-06,
"loss": 0.0055,
"step": 7450
},
{
"epoch": 5.663313721768836,
"grad_norm": 0.0016362261958420277,
"learning_rate": 7.837022831384107e-06,
"loss": 0.0001,
"step": 7460
},
{
"epoch": 5.670905295122414,
"grad_norm": 0.006628331728279591,
"learning_rate": 7.829743220628515e-06,
"loss": 0.0001,
"step": 7470
},
{
"epoch": 5.6784968684759916,
"grad_norm": 0.0015720854280516505,
"learning_rate": 7.822454774355233e-06,
"loss": 0.1205,
"step": 7480
},
{
"epoch": 5.686088441829569,
"grad_norm": 0.005687546916306019,
"learning_rate": 7.815157515321521e-06,
"loss": 0.1584,
"step": 7490
},
{
"epoch": 5.693680015183147,
"grad_norm": 0.0018359271343797445,
"learning_rate": 7.807851466312152e-06,
"loss": 0.0833,
"step": 7500
},
{
"epoch": 5.701271588536724,
"grad_norm": 0.004786277189850807,
"learning_rate": 7.80053665013935e-06,
"loss": 0.0004,
"step": 7510
},
{
"epoch": 5.708863161890302,
"grad_norm": 0.14934459328651428,
"learning_rate": 7.793213089642705e-06,
"loss": 0.0678,
"step": 7520
},
{
"epoch": 5.716454735243879,
"grad_norm": 0.002186194993555546,
"learning_rate": 7.785880807689119e-06,
"loss": 0.014,
"step": 7530
},
{
"epoch": 5.7240463085974564,
"grad_norm": 0.007107855286449194,
"learning_rate": 7.778539827172717e-06,
"loss": 0.0021,
"step": 7540
},
{
"epoch": 5.731637881951034,
"grad_norm": 0.00156366394367069,
"learning_rate": 7.771190171014789e-06,
"loss": 0.0299,
"step": 7550
},
{
"epoch": 5.739229455304612,
"grad_norm": 0.006057819351553917,
"learning_rate": 7.763831862163715e-06,
"loss": 0.3021,
"step": 7560
},
{
"epoch": 5.746821028658189,
"grad_norm": 0.1267128884792328,
"learning_rate": 7.756464923594889e-06,
"loss": 0.1477,
"step": 7570
},
{
"epoch": 5.754412602011767,
"grad_norm": 0.003787196008488536,
"learning_rate": 7.74908937831065e-06,
"loss": 0.0012,
"step": 7580
},
{
"epoch": 5.762004175365345,
"grad_norm": 0.004670240916311741,
"learning_rate": 7.741705249340212e-06,
"loss": 0.0001,
"step": 7590
},
{
"epoch": 5.769595748718922,
"grad_norm": 0.0031925721559673548,
"learning_rate": 7.734312559739591e-06,
"loss": 0.1256,
"step": 7600
},
{
"epoch": 5.7771873220725,
"grad_norm": 0.05346198379993439,
"learning_rate": 7.726911332591533e-06,
"loss": 0.0297,
"step": 7610
},
{
"epoch": 5.7847788954260775,
"grad_norm": 9.102517127990723,
"learning_rate": 7.719501591005435e-06,
"loss": 0.0291,
"step": 7620
},
{
"epoch": 5.792370468779654,
"grad_norm": 0.012199531309306622,
"learning_rate": 7.71208335811729e-06,
"loss": 0.0015,
"step": 7630
},
{
"epoch": 5.799962042133232,
"grad_norm": 0.0010750379879027605,
"learning_rate": 7.704656657089594e-06,
"loss": 0.0002,
"step": 7640
},
{
"epoch": 5.8075536154868095,
"grad_norm": 0.0029223288875073195,
"learning_rate": 7.697221511111289e-06,
"loss": 0.0404,
"step": 7650
},
{
"epoch": 5.815145188840387,
"grad_norm": 0.030176958069205284,
"learning_rate": 7.689777943397684e-06,
"loss": 0.0002,
"step": 7660
},
{
"epoch": 5.822736762193965,
"grad_norm": 0.01166499499231577,
"learning_rate": 7.682325977190386e-06,
"loss": 0.0381,
"step": 7670
},
{
"epoch": 5.830328335547542,
"grad_norm": 32.26509475708008,
"learning_rate": 7.674865635757219e-06,
"loss": 0.0993,
"step": 7680
},
{
"epoch": 5.83791990890112,
"grad_norm": 19.091943740844727,
"learning_rate": 7.667396942392165e-06,
"loss": 0.0492,
"step": 7690
},
{
"epoch": 5.845511482254698,
"grad_norm": 0.01752518303692341,
"learning_rate": 7.659919920415282e-06,
"loss": 0.0053,
"step": 7700
},
{
"epoch": 5.853103055608274,
"grad_norm": 0.0013000709004700184,
"learning_rate": 7.652434593172629e-06,
"loss": 0.2842,
"step": 7710
},
{
"epoch": 5.860694628961852,
"grad_norm": 76.4178695678711,
"learning_rate": 7.6449409840362e-06,
"loss": 0.019,
"step": 7720
},
{
"epoch": 5.86828620231543,
"grad_norm": 49.07400894165039,
"learning_rate": 7.63743911640385e-06,
"loss": 0.0412,
"step": 7730
},
{
"epoch": 5.875877775669007,
"grad_norm": 0.018517136573791504,
"learning_rate": 7.629929013699215e-06,
"loss": 0.0113,
"step": 7740
},
{
"epoch": 5.883469349022585,
"grad_norm": 0.0009308361331932247,
"learning_rate": 7.622410699371651e-06,
"loss": 0.0975,
"step": 7750
},
{
"epoch": 5.8910609223761625,
"grad_norm": 0.002873294521123171,
"learning_rate": 7.614884196896146e-06,
"loss": 0.0001,
"step": 7760
},
{
"epoch": 5.89865249572974,
"grad_norm": 0.5766377449035645,
"learning_rate": 7.607349529773263e-06,
"loss": 0.0894,
"step": 7770
},
{
"epoch": 5.906244069083318,
"grad_norm": 0.33659154176712036,
"learning_rate": 7.599806721529048e-06,
"loss": 0.026,
"step": 7780
},
{
"epoch": 5.913835642436895,
"grad_norm": 0.06800296902656555,
"learning_rate": 7.592255795714978e-06,
"loss": 0.001,
"step": 7790
},
{
"epoch": 5.921427215790473,
"grad_norm": 0.010890863835811615,
"learning_rate": 7.5846967759078646e-06,
"loss": 0.0515,
"step": 7800
},
{
"epoch": 5.92901878914405,
"grad_norm": 0.0007496042526327074,
"learning_rate": 7.577129685709802e-06,
"loss": 0.0196,
"step": 7810
},
{
"epoch": 5.936610362497627,
"grad_norm": 0.12547799944877625,
"learning_rate": 7.569554548748076e-06,
"loss": 0.0212,
"step": 7820
},
{
"epoch": 5.944201935851205,
"grad_norm": 0.000410243752412498,
"learning_rate": 7.561971388675101e-06,
"loss": 0.0001,
"step": 7830
},
{
"epoch": 5.951793509204783,
"grad_norm": 0.0626864954829216,
"learning_rate": 7.554380229168341e-06,
"loss": 0.1047,
"step": 7840
},
{
"epoch": 5.95938508255836,
"grad_norm": 0.0048113660886883736,
"learning_rate": 7.546781093930238e-06,
"loss": 0.0166,
"step": 7850
},
{
"epoch": 5.966976655911938,
"grad_norm": 0.04934828728437424,
"learning_rate": 7.539174006688137e-06,
"loss": 0.1765,
"step": 7860
},
{
"epoch": 5.974568229265516,
"grad_norm": 3.118401527404785,
"learning_rate": 7.531558991194214e-06,
"loss": 0.0369,
"step": 7870
},
{
"epoch": 5.982159802619093,
"grad_norm": 33.45072937011719,
"learning_rate": 7.523936071225395e-06,
"loss": 0.1186,
"step": 7880
},
{
"epoch": 5.98975137597267,
"grad_norm": 0.09529292583465576,
"learning_rate": 7.516305270583291e-06,
"loss": 0.0382,
"step": 7890
},
{
"epoch": 5.997342949326248,
"grad_norm": 0.09993643313646317,
"learning_rate": 7.50866661309412e-06,
"loss": 0.1966,
"step": 7900
},
{
"epoch": 5.999620421332321,
"eval_f1": 0.9453778934602862,
"eval_loss": 0.17724575102329254,
"eval_precision": 0.9455308702748206,
"eval_recall": 0.9453717754172989,
"eval_runtime": 75.7194,
"eval_samples_per_second": 17.406,
"eval_steps_per_second": 17.406,
"step": 7903
},
{
"epoch": 6.004934522679825,
"grad_norm": 0.5747145414352417,
"learning_rate": 7.5010201226086285e-06,
"loss": 0.0792,
"step": 7910
},
{
"epoch": 6.012526096033403,
"grad_norm": 15.592010498046875,
"learning_rate": 7.493365823002023e-06,
"loss": 0.066,
"step": 7920
},
{
"epoch": 6.0201176693869805,
"grad_norm": 0.002133031841367483,
"learning_rate": 7.4857037381738924e-06,
"loss": 0.0001,
"step": 7930
},
{
"epoch": 6.027709242740558,
"grad_norm": 0.006577716208994389,
"learning_rate": 7.478033892048134e-06,
"loss": 0.0005,
"step": 7940
},
{
"epoch": 6.035300816094136,
"grad_norm": 0.0061035482212901115,
"learning_rate": 7.470356308572879e-06,
"loss": 0.0,
"step": 7950
},
{
"epoch": 6.042892389447713,
"grad_norm": 0.0037885792553424835,
"learning_rate": 7.462671011720417e-06,
"loss": 0.0001,
"step": 7960
},
{
"epoch": 6.050483962801291,
"grad_norm": 0.010262789204716682,
"learning_rate": 7.454978025487121e-06,
"loss": 0.0007,
"step": 7970
},
{
"epoch": 6.058075536154868,
"grad_norm": 0.0021226617973297834,
"learning_rate": 7.447277373893373e-06,
"loss": 0.0386,
"step": 7980
},
{
"epoch": 6.065667109508445,
"grad_norm": 0.00850209966301918,
"learning_rate": 7.439569080983493e-06,
"loss": 0.0008,
"step": 7990
},
{
"epoch": 6.073258682862023,
"grad_norm": 0.004618831444531679,
"learning_rate": 7.431853170825658e-06,
"loss": 0.0,
"step": 8000
},
{
"epoch": 6.080850256215601,
"grad_norm": 0.0010309051722288132,
"learning_rate": 7.424129667511824e-06,
"loss": 0.0174,
"step": 8010
},
{
"epoch": 6.088441829569178,
"grad_norm": 0.005731165409088135,
"learning_rate": 7.4163985951576616e-06,
"loss": 0.0099,
"step": 8020
},
{
"epoch": 6.096033402922756,
"grad_norm": 2.437437057495117,
"learning_rate": 7.408659977902474e-06,
"loss": 0.159,
"step": 8030
},
{
"epoch": 6.1036249762763335,
"grad_norm": 0.008021681569516659,
"learning_rate": 7.400913839909119e-06,
"loss": 0.0002,
"step": 8040
},
{
"epoch": 6.111216549629911,
"grad_norm": 0.0012970505049452186,
"learning_rate": 7.3931602053639414e-06,
"loss": 0.0527,
"step": 8050
},
{
"epoch": 6.118808122983489,
"grad_norm": 0.031485993415117264,
"learning_rate": 7.385399098476691e-06,
"loss": 0.0416,
"step": 8060
},
{
"epoch": 6.1263996963370655,
"grad_norm": 0.037826113402843475,
"learning_rate": 7.377630543480447e-06,
"loss": 0.0064,
"step": 8070
},
{
"epoch": 6.133991269690643,
"grad_norm": 0.007939423434436321,
"learning_rate": 7.369854564631549e-06,
"loss": 0.0004,
"step": 8080
},
{
"epoch": 6.141582843044221,
"grad_norm": 0.011576803401112556,
"learning_rate": 7.3620711862095116e-06,
"loss": 0.0003,
"step": 8090
},
{
"epoch": 6.149174416397798,
"grad_norm": 0.01118936575949192,
"learning_rate": 7.354280432516957e-06,
"loss": 0.0002,
"step": 8100
},
{
"epoch": 6.156765989751376,
"grad_norm": 0.001931383740156889,
"learning_rate": 7.346482327879535e-06,
"loss": 0.0009,
"step": 8110
},
{
"epoch": 6.164357563104954,
"grad_norm": 0.005506934132426977,
"learning_rate": 7.338676896645848e-06,
"loss": 0.0567,
"step": 8120
},
{
"epoch": 6.171949136458531,
"grad_norm": 0.07792196422815323,
"learning_rate": 7.330864163187372e-06,
"loss": 0.0003,
"step": 8130
},
{
"epoch": 6.179540709812109,
"grad_norm": 0.06636549532413483,
"learning_rate": 7.323044151898388e-06,
"loss": 0.0658,
"step": 8140
},
{
"epoch": 6.1871322831656865,
"grad_norm": 0.0012724515981972218,
"learning_rate": 7.3152168871959e-06,
"loss": 0.0605,
"step": 8150
},
{
"epoch": 6.194723856519263,
"grad_norm": 0.0033073413651436567,
"learning_rate": 7.307382393519556e-06,
"loss": 0.0732,
"step": 8160
},
{
"epoch": 6.202315429872841,
"grad_norm": 0.00361923361197114,
"learning_rate": 7.299540695331579e-06,
"loss": 0.0054,
"step": 8170
},
{
"epoch": 6.2099070032264185,
"grad_norm": 0.0007601641118526459,
"learning_rate": 7.291691817116686e-06,
"loss": 0.0001,
"step": 8180
},
{
"epoch": 6.217498576579996,
"grad_norm": 0.0025373934768140316,
"learning_rate": 7.283835783382015e-06,
"loss": 0.0567,
"step": 8190
},
{
"epoch": 6.225090149933574,
"grad_norm": 0.0037624204996973276,
"learning_rate": 7.275972618657041e-06,
"loss": 0.0001,
"step": 8200
},
{
"epoch": 6.232681723287151,
"grad_norm": 0.002659817226231098,
"learning_rate": 7.268102347493511e-06,
"loss": 0.0727,
"step": 8210
},
{
"epoch": 6.240273296640729,
"grad_norm": 0.08516960591077805,
"learning_rate": 7.260224994465357e-06,
"loss": 0.001,
"step": 8220
},
{
"epoch": 6.247864869994307,
"grad_norm": 0.03827419877052307,
"learning_rate": 7.252340584168624e-06,
"loss": 0.0023,
"step": 8230
},
{
"epoch": 6.255456443347883,
"grad_norm": 0.0027726832777261734,
"learning_rate": 7.2444491412213914e-06,
"loss": 0.0536,
"step": 8240
},
{
"epoch": 6.263048016701461,
"grad_norm": 0.0064014289528131485,
"learning_rate": 7.236550690263702e-06,
"loss": 0.001,
"step": 8250
},
{
"epoch": 6.270639590055039,
"grad_norm": 0.005650675855576992,
"learning_rate": 7.228645255957472e-06,
"loss": 0.2206,
"step": 8260
},
{
"epoch": 6.278231163408616,
"grad_norm": 21.262990951538086,
"learning_rate": 7.2207328629864285e-06,
"loss": 0.0884,
"step": 8270
},
{
"epoch": 6.285822736762194,
"grad_norm": 0.03092315047979355,
"learning_rate": 7.212813536056025e-06,
"loss": 0.0684,
"step": 8280
},
{
"epoch": 6.293414310115772,
"grad_norm": 0.00995034258812666,
"learning_rate": 7.2048872998933665e-06,
"loss": 0.0003,
"step": 8290
},
{
"epoch": 6.301005883469349,
"grad_norm": 0.08173485100269318,
"learning_rate": 7.196954179247127e-06,
"loss": 0.0699,
"step": 8300
},
{
"epoch": 6.308597456822927,
"grad_norm": 0.15706369280815125,
"learning_rate": 7.189014198887478e-06,
"loss": 0.0419,
"step": 8310
},
{
"epoch": 6.3161890301765045,
"grad_norm": 0.44603389501571655,
"learning_rate": 7.181067383606015e-06,
"loss": 0.0374,
"step": 8320
},
{
"epoch": 6.323780603530081,
"grad_norm": 89.45038604736328,
"learning_rate": 7.173113758215667e-06,
"loss": 0.0231,
"step": 8330
},
{
"epoch": 6.331372176883659,
"grad_norm": 0.07431600242853165,
"learning_rate": 7.165153347550631e-06,
"loss": 0.007,
"step": 8340
},
{
"epoch": 6.3389637502372365,
"grad_norm": 0.00812879391014576,
"learning_rate": 7.15718617646629e-06,
"loss": 0.1122,
"step": 8350
},
{
"epoch": 6.346555323590814,
"grad_norm": 0.4049533009529114,
"learning_rate": 7.149212269839132e-06,
"loss": 0.0532,
"step": 8360
},
{
"epoch": 6.354146896944392,
"grad_norm": 0.403401255607605,
"learning_rate": 7.141231652566681e-06,
"loss": 0.0008,
"step": 8370
},
{
"epoch": 6.361738470297969,
"grad_norm": 0.8025851249694824,
"learning_rate": 7.133244349567411e-06,
"loss": 0.0221,
"step": 8380
},
{
"epoch": 6.369330043651547,
"grad_norm": 0.06498798727989197,
"learning_rate": 7.125250385780673e-06,
"loss": 0.0621,
"step": 8390
},
{
"epoch": 6.376921617005125,
"grad_norm": 0.0010519091738387942,
"learning_rate": 7.1172497861666124e-06,
"loss": 0.0404,
"step": 8400
},
{
"epoch": 6.384513190358702,
"grad_norm": 0.01423695683479309,
"learning_rate": 7.109242575706099e-06,
"loss": 0.0314,
"step": 8410
},
{
"epoch": 6.392104763712279,
"grad_norm": 0.8802148103713989,
"learning_rate": 7.10122877940064e-06,
"loss": 0.013,
"step": 8420
},
{
"epoch": 6.399696337065857,
"grad_norm": 0.037081677466630936,
"learning_rate": 7.093208422272309e-06,
"loss": 0.0005,
"step": 8430
},
{
"epoch": 6.407287910419434,
"grad_norm": 0.0005525704473257065,
"learning_rate": 7.085181529363661e-06,
"loss": 0.0972,
"step": 8440
},
{
"epoch": 6.414879483773012,
"grad_norm": 0.018398938700556755,
"learning_rate": 7.077148125737661e-06,
"loss": 0.1108,
"step": 8450
},
{
"epoch": 6.4224710571265895,
"grad_norm": 0.040173228830099106,
"learning_rate": 7.069108236477604e-06,
"loss": 0.0002,
"step": 8460
},
{
"epoch": 6.430062630480167,
"grad_norm": 0.009616430848836899,
"learning_rate": 7.061061886687035e-06,
"loss": 0.0013,
"step": 8470
},
{
"epoch": 6.437654203833745,
"grad_norm": 78.41429901123047,
"learning_rate": 7.053009101489667e-06,
"loss": 0.1232,
"step": 8480
},
{
"epoch": 6.445245777187322,
"grad_norm": 0.000696105882525444,
"learning_rate": 7.044949906029314e-06,
"loss": 0.0066,
"step": 8490
},
{
"epoch": 6.4528373505409,
"grad_norm": 0.012759624980390072,
"learning_rate": 7.036884325469797e-06,
"loss": 0.205,
"step": 8500
},
{
"epoch": 6.460428923894477,
"grad_norm": 51.840309143066406,
"learning_rate": 7.028812384994883e-06,
"loss": 0.1227,
"step": 8510
},
{
"epoch": 6.468020497248054,
"grad_norm": 0.0019890512339770794,
"learning_rate": 7.0207341098081875e-06,
"loss": 0.1419,
"step": 8520
},
{
"epoch": 6.475612070601632,
"grad_norm": 0.003854219801723957,
"learning_rate": 7.012649525133112e-06,
"loss": 0.0714,
"step": 8530
},
{
"epoch": 6.48320364395521,
"grad_norm": 0.06946977972984314,
"learning_rate": 7.004558656212754e-06,
"loss": 0.0004,
"step": 8540
},
{
"epoch": 6.490795217308787,
"grad_norm": 0.003731220494955778,
"learning_rate": 6.9964615283098405e-06,
"loss": 0.0017,
"step": 8550
},
{
"epoch": 6.498386790662365,
"grad_norm": 0.002791723469272256,
"learning_rate": 6.988358166706631e-06,
"loss": 0.0403,
"step": 8560
},
{
"epoch": 6.5059783640159425,
"grad_norm": 4.053121089935303,
"learning_rate": 6.980248596704856e-06,
"loss": 0.0008,
"step": 8570
},
{
"epoch": 6.51356993736952,
"grad_norm": 0.0038540286477655172,
"learning_rate": 6.97213284362563e-06,
"loss": 0.0003,
"step": 8580
},
{
"epoch": 6.521161510723097,
"grad_norm": 0.0033889245241880417,
"learning_rate": 6.96401093280937e-06,
"loss": 0.0505,
"step": 8590
},
{
"epoch": 6.5287530840766745,
"grad_norm": 0.0008385963155888021,
"learning_rate": 6.9558828896157225e-06,
"loss": 0.0001,
"step": 8600
},
{
"epoch": 6.536344657430252,
"grad_norm": 0.05049284175038338,
"learning_rate": 6.947748739423483e-06,
"loss": 0.0776,
"step": 8610
},
{
"epoch": 6.54393623078383,
"grad_norm": 0.014165320433676243,
"learning_rate": 6.939608507630513e-06,
"loss": 0.0339,
"step": 8620
},
{
"epoch": 6.551527804137407,
"grad_norm": 24.47572898864746,
"learning_rate": 6.931462219653662e-06,
"loss": 0.1604,
"step": 8630
},
{
"epoch": 6.559119377490985,
"grad_norm": 0.07809809595346451,
"learning_rate": 6.923309900928693e-06,
"loss": 0.0003,
"step": 8640
},
{
"epoch": 6.566710950844563,
"grad_norm": 0.08131968230009079,
"learning_rate": 6.915151576910194e-06,
"loss": 0.0097,
"step": 8650
},
{
"epoch": 6.57430252419814,
"grad_norm": 106.42731475830078,
"learning_rate": 6.906987273071509e-06,
"loss": 0.0111,
"step": 8660
},
{
"epoch": 6.581894097551718,
"grad_norm": 0.0046349032782018185,
"learning_rate": 6.898817014904653e-06,
"loss": 0.0601,
"step": 8670
},
{
"epoch": 6.589485670905296,
"grad_norm": 0.00192779372446239,
"learning_rate": 6.890640827920226e-06,
"loss": 0.0349,
"step": 8680
},
{
"epoch": 6.597077244258872,
"grad_norm": 0.0012624857481569052,
"learning_rate": 6.882458737647346e-06,
"loss": 0.0009,
"step": 8690
},
{
"epoch": 6.60466881761245,
"grad_norm": 0.00019073448493145406,
"learning_rate": 6.874270769633564e-06,
"loss": 0.0001,
"step": 8700
},
{
"epoch": 6.612260390966028,
"grad_norm": 0.03901955857872963,
"learning_rate": 6.866076949444781e-06,
"loss": 0.238,
"step": 8710
},
{
"epoch": 6.619851964319605,
"grad_norm": 0.05632855370640755,
"learning_rate": 6.857877302665169e-06,
"loss": 0.0435,
"step": 8720
},
{
"epoch": 6.627443537673183,
"grad_norm": 0.01720161736011505,
"learning_rate": 6.8496718548970956e-06,
"loss": 0.0208,
"step": 8730
},
{
"epoch": 6.6350351110267605,
"grad_norm": 0.00398442754521966,
"learning_rate": 6.8414606317610435e-06,
"loss": 0.0012,
"step": 8740
},
{
"epoch": 6.642626684380338,
"grad_norm": 0.02426181733608246,
"learning_rate": 6.833243658895521e-06,
"loss": 0.0004,
"step": 8750
},
{
"epoch": 6.650218257733916,
"grad_norm": 14.350150108337402,
"learning_rate": 6.825020961956995e-06,
"loss": 0.0823,
"step": 8760
},
{
"epoch": 6.6578098310874925,
"grad_norm": 0.0016744782915338874,
"learning_rate": 6.816792566619805e-06,
"loss": 0.1436,
"step": 8770
},
{
"epoch": 6.66540140444107,
"grad_norm": 0.020618196576833725,
"learning_rate": 6.808558498576081e-06,
"loss": 0.0006,
"step": 8780
},
{
"epoch": 6.672992977794648,
"grad_norm": 0.13271041214466095,
"learning_rate": 6.800318783535665e-06,
"loss": 0.0074,
"step": 8790
},
{
"epoch": 6.680584551148225,
"grad_norm": 0.020608441904187202,
"learning_rate": 6.792073447226034e-06,
"loss": 0.0002,
"step": 8800
},
{
"epoch": 6.688176124501803,
"grad_norm": 0.0014845712576061487,
"learning_rate": 6.7838225153922125e-06,
"loss": 0.0004,
"step": 8810
},
{
"epoch": 6.695767697855381,
"grad_norm": 0.06566622108221054,
"learning_rate": 6.775566013796699e-06,
"loss": 0.055,
"step": 8820
},
{
"epoch": 6.703359271208958,
"grad_norm": 0.13233526051044464,
"learning_rate": 6.767303968219383e-06,
"loss": 0.051,
"step": 8830
},
{
"epoch": 6.710950844562536,
"grad_norm": 12.247241020202637,
"learning_rate": 6.759036404457465e-06,
"loss": 0.171,
"step": 8840
},
{
"epoch": 6.7185424179161135,
"grad_norm": 0.06808517873287201,
"learning_rate": 6.750763348325371e-06,
"loss": 0.1818,
"step": 8850
},
{
"epoch": 6.726133991269691,
"grad_norm": 0.011621583253145218,
"learning_rate": 6.7424848256546825e-06,
"loss": 0.0119,
"step": 8860
},
{
"epoch": 6.733725564623268,
"grad_norm": 22.450834274291992,
"learning_rate": 6.734200862294045e-06,
"loss": 0.176,
"step": 8870
},
{
"epoch": 6.7413171379768455,
"grad_norm": 11.976455688476562,
"learning_rate": 6.725911484109094e-06,
"loss": 0.0507,
"step": 8880
},
{
"epoch": 6.748908711330423,
"grad_norm": 0.042554713785648346,
"learning_rate": 6.717616716982369e-06,
"loss": 0.0004,
"step": 8890
},
{
"epoch": 6.756500284684001,
"grad_norm": 0.0029066246934235096,
"learning_rate": 6.7093165868132415e-06,
"loss": 0.0066,
"step": 8900
},
{
"epoch": 6.764091858037578,
"grad_norm": 0.31371551752090454,
"learning_rate": 6.701011119517824e-06,
"loss": 0.0311,
"step": 8910
},
{
"epoch": 6.771683431391156,
"grad_norm": 0.025408325716853142,
"learning_rate": 6.692700341028893e-06,
"loss": 0.0002,
"step": 8920
},
{
"epoch": 6.779275004744734,
"grad_norm": 0.6896237730979919,
"learning_rate": 6.684384277295813e-06,
"loss": 0.003,
"step": 8930
},
{
"epoch": 6.78686657809831,
"grad_norm": 0.0014387418050318956,
"learning_rate": 6.676062954284447e-06,
"loss": 0.1432,
"step": 8940
},
{
"epoch": 6.794458151451888,
"grad_norm": 0.012326021678745747,
"learning_rate": 6.667736397977079e-06,
"loss": 0.0131,
"step": 8950
},
{
"epoch": 6.802049724805466,
"grad_norm": 0.010481426492333412,
"learning_rate": 6.659404634372338e-06,
"loss": 0.0027,
"step": 8960
},
{
"epoch": 6.809641298159043,
"grad_norm": 0.11520393937826157,
"learning_rate": 6.6510676894851065e-06,
"loss": 0.0008,
"step": 8970
},
{
"epoch": 6.817232871512621,
"grad_norm": 14.105742454528809,
"learning_rate": 6.6427255893464495e-06,
"loss": 0.1792,
"step": 8980
},
{
"epoch": 6.8248244448661985,
"grad_norm": 0.012812143191695213,
"learning_rate": 6.634378360003525e-06,
"loss": 0.0001,
"step": 8990
},
{
"epoch": 6.832416018219776,
"grad_norm": 0.0041709113866090775,
"learning_rate": 6.62602602751951e-06,
"loss": 0.0001,
"step": 9000
},
{
"epoch": 6.840007591573354,
"grad_norm": 0.0038161997217684984,
"learning_rate": 6.6176686179735095e-06,
"loss": 0.0665,
"step": 9010
},
{
"epoch": 6.847599164926931,
"grad_norm": 0.30405986309051514,
"learning_rate": 6.6093061574604875e-06,
"loss": 0.0624,
"step": 9020
},
{
"epoch": 6.855190738280509,
"grad_norm": 0.001419481704942882,
"learning_rate": 6.600938672091178e-06,
"loss": 0.0001,
"step": 9030
},
{
"epoch": 6.862782311634086,
"grad_norm": 0.005425265524536371,
"learning_rate": 6.592566187992e-06,
"loss": 0.0115,
"step": 9040
},
{
"epoch": 6.870373884987663,
"grad_norm": 0.009964833967387676,
"learning_rate": 6.584188731304984e-06,
"loss": 0.0001,
"step": 9050
},
{
"epoch": 6.877965458341241,
"grad_norm": 17.450939178466797,
"learning_rate": 6.575806328187684e-06,
"loss": 0.0065,
"step": 9060
},
{
"epoch": 6.885557031694819,
"grad_norm": 0.5963069796562195,
"learning_rate": 6.567419004813105e-06,
"loss": 0.162,
"step": 9070
},
{
"epoch": 6.893148605048396,
"grad_norm": 0.002563629997894168,
"learning_rate": 6.559026787369608e-06,
"loss": 0.0006,
"step": 9080
},
{
"epoch": 6.900740178401974,
"grad_norm": 0.0032906217966228724,
"learning_rate": 6.550629702060836e-06,
"loss": 0.0576,
"step": 9090
},
{
"epoch": 6.908331751755552,
"grad_norm": 0.00252812379039824,
"learning_rate": 6.542227775105636e-06,
"loss": 0.0003,
"step": 9100
},
{
"epoch": 6.915923325109129,
"grad_norm": 0.13027949631214142,
"learning_rate": 6.533821032737968e-06,
"loss": 0.1393,
"step": 9110
},
{
"epoch": 6.923514898462706,
"grad_norm": 0.0013868529349565506,
"learning_rate": 6.525409501206828e-06,
"loss": 0.0003,
"step": 9120
},
{
"epoch": 6.931106471816284,
"grad_norm": 0.0035531616304069757,
"learning_rate": 6.516993206776167e-06,
"loss": 0.0516,
"step": 9130
},
{
"epoch": 6.938698045169861,
"grad_norm": 0.02282761037349701,
"learning_rate": 6.508572175724809e-06,
"loss": 0.0811,
"step": 9140
},
{
"epoch": 6.946289618523439,
"grad_norm": 29.90252685546875,
"learning_rate": 6.500146434346363e-06,
"loss": 0.065,
"step": 9150
},
{
"epoch": 6.9538811918770165,
"grad_norm": 0.14673539996147156,
"learning_rate": 6.4917160089491475e-06,
"loss": 0.0004,
"step": 9160
},
{
"epoch": 6.961472765230594,
"grad_norm": 2.630889892578125,
"learning_rate": 6.483280925856108e-06,
"loss": 0.039,
"step": 9170
},
{
"epoch": 6.969064338584172,
"grad_norm": 0.005536849144846201,
"learning_rate": 6.474841211404732e-06,
"loss": 0.0212,
"step": 9180
},
{
"epoch": 6.976655911937749,
"grad_norm": 13.343396186828613,
"learning_rate": 6.466396891946967e-06,
"loss": 0.1344,
"step": 9190
},
{
"epoch": 6.984247485291327,
"grad_norm": 13.473750114440918,
"learning_rate": 6.457947993849138e-06,
"loss": 0.0461,
"step": 9200
},
{
"epoch": 6.991839058644905,
"grad_norm": 0.10873476415872574,
"learning_rate": 6.4494945434918695e-06,
"loss": 0.0008,
"step": 9210
},
{
"epoch": 6.999430631998481,
"grad_norm": 0.08516258746385574,
"learning_rate": 6.441036567269999e-06,
"loss": 0.0006,
"step": 9220
},
{
"epoch": 6.999430631998481,
"eval_f1": 0.9574758853469025,
"eval_loss": 0.15473049879074097,
"eval_precision": 0.9565893515212521,
"eval_recall": 0.9575113808801214,
"eval_runtime": 75.8291,
"eval_samples_per_second": 17.381,
"eval_steps_per_second": 17.381,
"step": 9220
},
{
"epoch": 7.007022205352059,
"grad_norm": 0.4523492455482483,
"learning_rate": 6.432574091592495e-06,
"loss": 0.0557,
"step": 9230
},
{
"epoch": 7.014613778705637,
"grad_norm": 0.03357968479394913,
"learning_rate": 6.424107142882371e-06,
"loss": 0.0008,
"step": 9240
},
{
"epoch": 7.022205352059214,
"grad_norm": 46.4831657409668,
"learning_rate": 6.415635747576613e-06,
"loss": 0.0078,
"step": 9250
},
{
"epoch": 7.029796925412792,
"grad_norm": 0.010737122967839241,
"learning_rate": 6.40715993212609e-06,
"loss": 0.0003,
"step": 9260
},
{
"epoch": 7.0373884987663695,
"grad_norm": 21.10315704345703,
"learning_rate": 6.398679722995468e-06,
"loss": 0.1309,
"step": 9270
},
{
"epoch": 7.044980072119947,
"grad_norm": 0.010574131272733212,
"learning_rate": 6.3901951466631355e-06,
"loss": 0.0138,
"step": 9280
},
{
"epoch": 7.052571645473525,
"grad_norm": 0.0182713121175766,
"learning_rate": 6.381706229621117e-06,
"loss": 0.0002,
"step": 9290
},
{
"epoch": 7.0601632188271015,
"grad_norm": 0.10783802717924118,
"learning_rate": 6.373212998374989e-06,
"loss": 0.0337,
"step": 9300
},
{
"epoch": 7.067754792180679,
"grad_norm": 0.001446128822863102,
"learning_rate": 6.364715479443798e-06,
"loss": 0.0007,
"step": 9310
},
{
"epoch": 7.075346365534257,
"grad_norm": 0.00694943917915225,
"learning_rate": 6.356213699359982e-06,
"loss": 0.071,
"step": 9320
},
{
"epoch": 7.082937938887834,
"grad_norm": 0.09859494864940643,
"learning_rate": 6.347707684669278e-06,
"loss": 0.0005,
"step": 9330
},
{
"epoch": 7.090529512241412,
"grad_norm": 0.0008273068233393133,
"learning_rate": 6.33919746193065e-06,
"loss": 0.0,
"step": 9340
},
{
"epoch": 7.09812108559499,
"grad_norm": 0.0038316529244184494,
"learning_rate": 6.330683057716198e-06,
"loss": 0.0002,
"step": 9350
},
{
"epoch": 7.105712658948567,
"grad_norm": 0.0030708136036992073,
"learning_rate": 6.322164498611081e-06,
"loss": 0.0444,
"step": 9360
},
{
"epoch": 7.113304232302145,
"grad_norm": 0.0017414516769349575,
"learning_rate": 6.313641811213429e-06,
"loss": 0.0001,
"step": 9370
},
{
"epoch": 7.1208958056557226,
"grad_norm": 0.0035761166363954544,
"learning_rate": 6.305115022134262e-06,
"loss": 0.0001,
"step": 9380
},
{
"epoch": 7.128487379009299,
"grad_norm": 0.006457789335399866,
"learning_rate": 6.296584157997408e-06,
"loss": 0.0,
"step": 9390
},
{
"epoch": 7.136078952362877,
"grad_norm": 0.002314153825864196,
"learning_rate": 6.288049245439419e-06,
"loss": 0.0,
"step": 9400
},
{
"epoch": 7.1436705257164546,
"grad_norm": 0.008694717660546303,
"learning_rate": 6.279510311109487e-06,
"loss": 0.0001,
"step": 9410
},
{
"epoch": 7.151262099070032,
"grad_norm": 0.0009509180672466755,
"learning_rate": 6.270967381669362e-06,
"loss": 0.0001,
"step": 9420
},
{
"epoch": 7.15885367242361,
"grad_norm": 0.009006676264107227,
"learning_rate": 6.262420483793267e-06,
"loss": 0.0605,
"step": 9430
},
{
"epoch": 7.1664452457771874,
"grad_norm": 0.048271872103214264,
"learning_rate": 6.253869644167816e-06,
"loss": 0.3191,
"step": 9440
},
{
"epoch": 7.174036819130765,
"grad_norm": 0.001320886891335249,
"learning_rate": 6.245314889491933e-06,
"loss": 0.0066,
"step": 9450
},
{
"epoch": 7.181628392484343,
"grad_norm": 0.4799332916736603,
"learning_rate": 6.236756246476765e-06,
"loss": 0.0261,
"step": 9460
},
{
"epoch": 7.18921996583792,
"grad_norm": 1.649972677230835,
"learning_rate": 6.228193741845598e-06,
"loss": 0.001,
"step": 9470
},
{
"epoch": 7.196811539191497,
"grad_norm": 0.001544089405797422,
"learning_rate": 6.219627402333779e-06,
"loss": 0.0001,
"step": 9480
},
{
"epoch": 7.204403112545075,
"grad_norm": 0.0058356523513793945,
"learning_rate": 6.211057254688625e-06,
"loss": 0.0005,
"step": 9490
},
{
"epoch": 7.211994685898652,
"grad_norm": 0.001541537931188941,
"learning_rate": 6.202483325669345e-06,
"loss": 0.0001,
"step": 9500
},
{
"epoch": 7.21958625925223,
"grad_norm": 0.002176716923713684,
"learning_rate": 6.193905642046957e-06,
"loss": 0.0472,
"step": 9510
},
{
"epoch": 7.227177832605808,
"grad_norm": 1.9937260150909424,
"learning_rate": 6.1853242306041995e-06,
"loss": 0.1573,
"step": 9520
},
{
"epoch": 7.234769405959385,
"grad_norm": 0.005575578194111586,
"learning_rate": 6.176739118135451e-06,
"loss": 0.004,
"step": 9530
},
{
"epoch": 7.242360979312963,
"grad_norm": 0.000481792347272858,
"learning_rate": 6.168150331446647e-06,
"loss": 0.0001,
"step": 9540
},
{
"epoch": 7.2499525526665405,
"grad_norm": 17.994680404663086,
"learning_rate": 6.159557897355198e-06,
"loss": 0.1026,
"step": 9550
},
{
"epoch": 7.257544126020118,
"grad_norm": 0.002096704440191388,
"learning_rate": 6.1509618426898934e-06,
"loss": 0.0004,
"step": 9560
},
{
"epoch": 7.265135699373695,
"grad_norm": 0.8841345906257629,
"learning_rate": 6.142362194290839e-06,
"loss": 0.0017,
"step": 9570
},
{
"epoch": 7.2727272727272725,
"grad_norm": 0.0021465634927153587,
"learning_rate": 6.133758979009355e-06,
"loss": 0.0001,
"step": 9580
},
{
"epoch": 7.28031884608085,
"grad_norm": 0.000766513985581696,
"learning_rate": 6.1251522237078996e-06,
"loss": 0.2186,
"step": 9590
},
{
"epoch": 7.287910419434428,
"grad_norm": 0.01812721975147724,
"learning_rate": 6.116541955259986e-06,
"loss": 0.0007,
"step": 9600
},
{
"epoch": 7.295501992788005,
"grad_norm": 0.00034479115856811404,
"learning_rate": 6.1079282005500965e-06,
"loss": 0.0055,
"step": 9610
},
{
"epoch": 7.303093566141583,
"grad_norm": 0.0008322893991135061,
"learning_rate": 6.099310986473595e-06,
"loss": 0.1915,
"step": 9620
},
{
"epoch": 7.310685139495161,
"grad_norm": 0.001017669215798378,
"learning_rate": 6.090690339936651e-06,
"loss": 0.0001,
"step": 9630
},
{
"epoch": 7.318276712848738,
"grad_norm": 0.003790239803493023,
"learning_rate": 6.082066287856152e-06,
"loss": 0.0001,
"step": 9640
},
{
"epoch": 7.325868286202315,
"grad_norm": 0.001801560982130468,
"learning_rate": 6.073438857159617e-06,
"loss": 0.0404,
"step": 9650
},
{
"epoch": 7.333459859555893,
"grad_norm": 0.0027911756187677383,
"learning_rate": 6.064808074785112e-06,
"loss": 0.0215,
"step": 9660
},
{
"epoch": 7.34105143290947,
"grad_norm": 0.001065615564584732,
"learning_rate": 6.056173967681172e-06,
"loss": 0.0,
"step": 9670
},
{
"epoch": 7.348643006263048,
"grad_norm": 0.0008436132338829339,
"learning_rate": 6.047536562806712e-06,
"loss": 0.0001,
"step": 9680
},
{
"epoch": 7.3562345796166255,
"grad_norm": 0.8050636053085327,
"learning_rate": 6.038895887130942e-06,
"loss": 0.068,
"step": 9690
},
{
"epoch": 7.363826152970203,
"grad_norm": 0.011237557046115398,
"learning_rate": 6.030251967633288e-06,
"loss": 0.0001,
"step": 9700
},
{
"epoch": 7.371417726323781,
"grad_norm": 0.0008242133189924061,
"learning_rate": 6.021604831303303e-06,
"loss": 0.0963,
"step": 9710
},
{
"epoch": 7.379009299677358,
"grad_norm": 0.9633244276046753,
"learning_rate": 6.012954505140582e-06,
"loss": 0.0032,
"step": 9720
},
{
"epoch": 7.386600873030936,
"grad_norm": 33.064613342285156,
"learning_rate": 6.004301016154683e-06,
"loss": 0.0926,
"step": 9730
},
{
"epoch": 7.394192446384513,
"grad_norm": 0.010244650766253471,
"learning_rate": 5.995644391365038e-06,
"loss": 0.0,
"step": 9740
},
{
"epoch": 7.40178401973809,
"grad_norm": 0.0010498914634808898,
"learning_rate": 5.98698465780087e-06,
"loss": 0.2515,
"step": 9750
},
{
"epoch": 7.409375593091668,
"grad_norm": 0.005540487356483936,
"learning_rate": 5.978321842501108e-06,
"loss": 0.0001,
"step": 9760
},
{
"epoch": 7.416967166445246,
"grad_norm": 0.001410833327099681,
"learning_rate": 5.9696559725143054e-06,
"loss": 0.0024,
"step": 9770
},
{
"epoch": 7.424558739798823,
"grad_norm": 0.11642355471849442,
"learning_rate": 5.960987074898553e-06,
"loss": 0.0004,
"step": 9780
},
{
"epoch": 7.432150313152401,
"grad_norm": 0.029217828065156937,
"learning_rate": 5.952315176721395e-06,
"loss": 0.0002,
"step": 9790
},
{
"epoch": 7.439741886505979,
"grad_norm": 0.057612184435129166,
"learning_rate": 5.943640305059742e-06,
"loss": 0.0455,
"step": 9800
},
{
"epoch": 7.447333459859556,
"grad_norm": 30.20539665222168,
"learning_rate": 5.9349624869997915e-06,
"loss": 0.152,
"step": 9810
},
{
"epoch": 7.454925033213134,
"grad_norm": 0.011167285032570362,
"learning_rate": 5.926281749636941e-06,
"loss": 0.0013,
"step": 9820
},
{
"epoch": 7.462516606566711,
"grad_norm": 0.01445252075791359,
"learning_rate": 5.9175981200757026e-06,
"loss": 0.0275,
"step": 9830
},
{
"epoch": 7.470108179920288,
"grad_norm": 0.0006470708176493645,
"learning_rate": 5.908911625429617e-06,
"loss": 0.0004,
"step": 9840
},
{
"epoch": 7.477699753273866,
"grad_norm": 0.010150356218218803,
"learning_rate": 5.900222292821173e-06,
"loss": 0.0572,
"step": 9850
},
{
"epoch": 7.4852913266274435,
"grad_norm": 0.05601394549012184,
"learning_rate": 5.89153014938172e-06,
"loss": 0.0004,
"step": 9860
},
{
"epoch": 7.492882899981021,
"grad_norm": 0.007213375996798277,
"learning_rate": 5.8828352222513866e-06,
"loss": 0.0184,
"step": 9870
},
{
"epoch": 7.500474473334599,
"grad_norm": 0.005943207535892725,
"learning_rate": 5.874137538578984e-06,
"loss": 0.0519,
"step": 9880
},
{
"epoch": 7.508066046688176,
"grad_norm": 0.005052383989095688,
"learning_rate": 5.865437125521943e-06,
"loss": 0.091,
"step": 9890
},
{
"epoch": 7.515657620041754,
"grad_norm": 0.000759047397878021,
"learning_rate": 5.856734010246207e-06,
"loss": 0.0,
"step": 9900
},
{
"epoch": 7.523249193395332,
"grad_norm": 0.004873152356594801,
"learning_rate": 5.848028219926162e-06,
"loss": 0.0001,
"step": 9910
},
{
"epoch": 7.530840766748908,
"grad_norm": 0.0005250478279776871,
"learning_rate": 5.839319781744543e-06,
"loss": 0.0,
"step": 9920
},
{
"epoch": 7.538432340102486,
"grad_norm": 0.0007055936730466783,
"learning_rate": 5.830608722892352e-06,
"loss": 0.0001,
"step": 9930
},
{
"epoch": 7.546023913456064,
"grad_norm": 0.0024068867787718773,
"learning_rate": 5.821895070568781e-06,
"loss": 0.0001,
"step": 9940
},
{
"epoch": 7.553615486809641,
"grad_norm": 183.14315795898438,
"learning_rate": 5.813178851981112e-06,
"loss": 0.1222,
"step": 9950
},
{
"epoch": 7.561207060163219,
"grad_norm": 0.8877391219139099,
"learning_rate": 5.804460094344642e-06,
"loss": 0.0002,
"step": 9960
},
{
"epoch": 7.5687986335167965,
"grad_norm": 0.006915534846484661,
"learning_rate": 5.795738824882596e-06,
"loss": 0.0001,
"step": 9970
},
{
"epoch": 7.576390206870374,
"grad_norm": 9.879432678222656,
"learning_rate": 5.787015070826044e-06,
"loss": 0.0076,
"step": 9980
},
{
"epoch": 7.583981780223952,
"grad_norm": 0.004392684902995825,
"learning_rate": 5.77828885941381e-06,
"loss": 0.0001,
"step": 9990
},
{
"epoch": 7.5915733535775285,
"grad_norm": 0.000951431633438915,
"learning_rate": 5.769560217892395e-06,
"loss": 0.0002,
"step": 10000
},
{
"epoch": 7.599164926931106,
"grad_norm": 0.0021181986667215824,
"learning_rate": 5.760829173515883e-06,
"loss": 0.0002,
"step": 10010
},
{
"epoch": 7.606756500284684,
"grad_norm": 0.006260419264435768,
"learning_rate": 5.752095753545864e-06,
"loss": 0.0577,
"step": 10020
},
{
"epoch": 7.614348073638261,
"grad_norm": 0.0006751982145942748,
"learning_rate": 5.743359985251348e-06,
"loss": 0.0292,
"step": 10030
},
{
"epoch": 7.621939646991839,
"grad_norm": 0.00024200859479606152,
"learning_rate": 5.734621895908668e-06,
"loss": 0.0238,
"step": 10040
},
{
"epoch": 7.629531220345417,
"grad_norm": 0.002035447396337986,
"learning_rate": 5.725881512801413e-06,
"loss": 0.0002,
"step": 10050
},
{
"epoch": 7.637122793698994,
"grad_norm": 0.0007019038312137127,
"learning_rate": 5.717138863220333e-06,
"loss": 0.0982,
"step": 10060
},
{
"epoch": 7.644714367052572,
"grad_norm": 0.0009322810219600797,
"learning_rate": 5.7083939744632514e-06,
"loss": 0.0001,
"step": 10070
},
{
"epoch": 7.6523059404061495,
"grad_norm": 0.011389588937163353,
"learning_rate": 5.699646873834983e-06,
"loss": 0.0691,
"step": 10080
},
{
"epoch": 7.659897513759727,
"grad_norm": 0.01710079051554203,
"learning_rate": 5.690897588647253e-06,
"loss": 0.0005,
"step": 10090
},
{
"epoch": 7.667489087113304,
"grad_norm": 9.926609992980957,
"learning_rate": 5.6821461462186045e-06,
"loss": 0.0352,
"step": 10100
},
{
"epoch": 7.6750806604668815,
"grad_norm": 0.00021108197688590735,
"learning_rate": 5.673392573874316e-06,
"loss": 0.0005,
"step": 10110
},
{
"epoch": 7.682672233820459,
"grad_norm": 0.001629292848519981,
"learning_rate": 5.6646368989463185e-06,
"loss": 0.0479,
"step": 10120
},
{
"epoch": 7.690263807174037,
"grad_norm": 0.12789593636989594,
"learning_rate": 5.655879148773107e-06,
"loss": 0.0183,
"step": 10130
},
{
"epoch": 7.697855380527614,
"grad_norm": 0.001387747353874147,
"learning_rate": 5.647119350699655e-06,
"loss": 0.0116,
"step": 10140
},
{
"epoch": 7.705446953881192,
"grad_norm": 0.0015600691549479961,
"learning_rate": 5.638357532077331e-06,
"loss": 0.0316,
"step": 10150
},
{
"epoch": 7.71303852723477,
"grad_norm": 0.0008326400420628488,
"learning_rate": 5.629593720263816e-06,
"loss": 0.0002,
"step": 10160
},
{
"epoch": 7.720630100588347,
"grad_norm": 0.023590516299009323,
"learning_rate": 5.620827942623008e-06,
"loss": 0.0008,
"step": 10170
},
{
"epoch": 7.728221673941924,
"grad_norm": 0.000754083099309355,
"learning_rate": 5.612060226524948e-06,
"loss": 0.0365,
"step": 10180
},
{
"epoch": 7.735813247295502,
"grad_norm": 0.011727853678166866,
"learning_rate": 5.603290599345726e-06,
"loss": 0.0438,
"step": 10190
},
{
"epoch": 7.743404820649079,
"grad_norm": 0.20062032341957092,
"learning_rate": 5.5945190884674065e-06,
"loss": 0.0056,
"step": 10200
},
{
"epoch": 7.750996394002657,
"grad_norm": 0.30250805616378784,
"learning_rate": 5.585745721277923e-06,
"loss": 0.1501,
"step": 10210
},
{
"epoch": 7.758587967356235,
"grad_norm": 0.00017410292639397085,
"learning_rate": 5.5769705251710175e-06,
"loss": 0.0002,
"step": 10220
},
{
"epoch": 7.766179540709812,
"grad_norm": 0.011902794241905212,
"learning_rate": 5.568193527546135e-06,
"loss": 0.0001,
"step": 10230
},
{
"epoch": 7.77377111406339,
"grad_norm": 0.3667079508304596,
"learning_rate": 5.559414755808348e-06,
"loss": 0.0394,
"step": 10240
},
{
"epoch": 7.7813626874169675,
"grad_norm": 0.001953916857019067,
"learning_rate": 5.550634237368269e-06,
"loss": 0.0006,
"step": 10250
},
{
"epoch": 7.788954260770545,
"grad_norm": 0.0013212488265708089,
"learning_rate": 5.541851999641964e-06,
"loss": 0.0004,
"step": 10260
},
{
"epoch": 7.796545834124123,
"grad_norm": 0.00039594716508872807,
"learning_rate": 5.533068070050867e-06,
"loss": 0.0322,
"step": 10270
},
{
"epoch": 7.8041374074776995,
"grad_norm": 0.000754969718400389,
"learning_rate": 5.524282476021692e-06,
"loss": 0.1497,
"step": 10280
},
{
"epoch": 7.811728980831277,
"grad_norm": 0.035513028502464294,
"learning_rate": 5.515495244986356e-06,
"loss": 0.0081,
"step": 10290
},
{
"epoch": 7.819320554184855,
"grad_norm": 0.0016785170882940292,
"learning_rate": 5.5067064043818815e-06,
"loss": 0.0001,
"step": 10300
},
{
"epoch": 7.826912127538432,
"grad_norm": 8.234527194872499e-05,
"learning_rate": 5.49791598165032e-06,
"loss": 0.0001,
"step": 10310
},
{
"epoch": 7.83450370089201,
"grad_norm": 0.0006789985345676541,
"learning_rate": 5.489124004238662e-06,
"loss": 0.0393,
"step": 10320
},
{
"epoch": 7.842095274245588,
"grad_norm": 0.0023299374151974916,
"learning_rate": 5.480330499598754e-06,
"loss": 0.0046,
"step": 10330
},
{
"epoch": 7.849686847599165,
"grad_norm": 0.007388091180473566,
"learning_rate": 5.471535495187207e-06,
"loss": 0.0001,
"step": 10340
},
{
"epoch": 7.857278420952742,
"grad_norm": 0.00018302824173588306,
"learning_rate": 5.462739018465318e-06,
"loss": 0.1768,
"step": 10350
},
{
"epoch": 7.86486999430632,
"grad_norm": 0.00418035127222538,
"learning_rate": 5.45394109689898e-06,
"loss": 0.0165,
"step": 10360
},
{
"epoch": 7.872461567659897,
"grad_norm": 0.01187161449342966,
"learning_rate": 5.445141757958599e-06,
"loss": 0.0004,
"step": 10370
},
{
"epoch": 7.880053141013475,
"grad_norm": 0.08545250445604324,
"learning_rate": 5.436341029119004e-06,
"loss": 0.06,
"step": 10380
},
{
"epoch": 7.8876447143670525,
"grad_norm": 0.004683859180659056,
"learning_rate": 5.427538937859368e-06,
"loss": 0.2187,
"step": 10390
},
{
"epoch": 7.89523628772063,
"grad_norm": 0.0011295732110738754,
"learning_rate": 5.418735511663112e-06,
"loss": 0.0002,
"step": 10400
},
{
"epoch": 7.902827861074208,
"grad_norm": 0.0021211670245975256,
"learning_rate": 5.409930778017828e-06,
"loss": 0.0425,
"step": 10410
},
{
"epoch": 7.910419434427785,
"grad_norm": 0.0004632298951037228,
"learning_rate": 5.401124764415192e-06,
"loss": 0.0001,
"step": 10420
},
{
"epoch": 7.918011007781363,
"grad_norm": 0.03465382754802704,
"learning_rate": 5.392317498350876e-06,
"loss": 0.07,
"step": 10430
},
{
"epoch": 7.925602581134941,
"grad_norm": 0.0012545166537165642,
"learning_rate": 5.38350900732446e-06,
"loss": 0.0003,
"step": 10440
},
{
"epoch": 7.933194154488517,
"grad_norm": 0.0008013694896362722,
"learning_rate": 5.374699318839352e-06,
"loss": 0.0001,
"step": 10450
},
{
"epoch": 7.940785727842095,
"grad_norm": 0.01796998642385006,
"learning_rate": 5.365888460402695e-06,
"loss": 0.0001,
"step": 10460
},
{
"epoch": 7.948377301195673,
"grad_norm": 0.06785059720277786,
"learning_rate": 5.357076459525291e-06,
"loss": 0.0002,
"step": 10470
},
{
"epoch": 7.95596887454925,
"grad_norm": 0.001381418784148991,
"learning_rate": 5.348263343721503e-06,
"loss": 0.0001,
"step": 10480
},
{
"epoch": 7.963560447902828,
"grad_norm": 0.06072179973125458,
"learning_rate": 5.339449140509179e-06,
"loss": 0.0002,
"step": 10490
},
{
"epoch": 7.9711520212564055,
"grad_norm": 0.024496397003531456,
"learning_rate": 5.330633877409561e-06,
"loss": 0.1215,
"step": 10500
},
{
"epoch": 7.978743594609983,
"grad_norm": 0.0315159372985363,
"learning_rate": 5.3218175819472e-06,
"loss": 0.0001,
"step": 10510
},
{
"epoch": 7.986335167963561,
"grad_norm": 0.0015373720088973641,
"learning_rate": 5.313000281649872e-06,
"loss": 0.0003,
"step": 10520
},
{
"epoch": 7.9939267413171375,
"grad_norm": 0.12398699671030045,
"learning_rate": 5.304182004048488e-06,
"loss": 0.0002,
"step": 10530
},
{
"epoch": 8.0,
"eval_f1": 0.9590113159486987,
"eval_loss": 0.15769141912460327,
"eval_precision": 0.9585736334342291,
"eval_recall": 0.9590288315629742,
"eval_runtime": 75.7332,
"eval_samples_per_second": 17.403,
"eval_steps_per_second": 17.403,
"step": 10538
}
],
"logging_steps": 10,
"max_steps": 19755,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.701261509159456e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}