intent_classifier / trainer_state.json
chidamnat2002's picture
Upload 14 files
86ac113 verified
{
"best_metric": 0.2595302164554596,
"best_model_checkpoint": "distilbert-base-uncased-lora-intent-classification-v2/checkpoint-67716",
"epoch": 9.0,
"eval_steps": 500,
"global_step": 67716,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06645401382243488,
"grad_norm": 4.6704421043396,
"learning_rate": 0.0009933545986177566,
"loss": 0.6675,
"step": 500
},
{
"epoch": 0.13290802764486975,
"grad_norm": 2.3022220134735107,
"learning_rate": 0.000986709197235513,
"loss": 0.4718,
"step": 1000
},
{
"epoch": 0.19936204146730463,
"grad_norm": 0.44215622544288635,
"learning_rate": 0.0009800637958532696,
"loss": 0.4146,
"step": 1500
},
{
"epoch": 0.2658160552897395,
"grad_norm": 0.08581192046403885,
"learning_rate": 0.0009734183944710261,
"loss": 0.4297,
"step": 2000
},
{
"epoch": 0.3322700691121744,
"grad_norm": 13.087315559387207,
"learning_rate": 0.0009667729930887826,
"loss": 0.3776,
"step": 2500
},
{
"epoch": 0.39872408293460926,
"grad_norm": 15.066133499145508,
"learning_rate": 0.0009601275917065391,
"loss": 0.4233,
"step": 3000
},
{
"epoch": 0.46517809675704413,
"grad_norm": 0.23827387392520905,
"learning_rate": 0.0009534821903242956,
"loss": 0.3613,
"step": 3500
},
{
"epoch": 0.531632110579479,
"grad_norm": 0.009319925680756569,
"learning_rate": 0.0009468367889420521,
"loss": 0.4269,
"step": 4000
},
{
"epoch": 0.5980861244019139,
"grad_norm": 0.665321946144104,
"learning_rate": 0.0009401913875598086,
"loss": 0.3815,
"step": 4500
},
{
"epoch": 0.6645401382243488,
"grad_norm": 3.580693483352661,
"learning_rate": 0.0009335459861775651,
"loss": 0.3539,
"step": 5000
},
{
"epoch": 0.7309941520467836,
"grad_norm": 0.12289135903120041,
"learning_rate": 0.0009269005847953217,
"loss": 0.4112,
"step": 5500
},
{
"epoch": 0.7974481658692185,
"grad_norm": 1.3471044301986694,
"learning_rate": 0.0009202551834130782,
"loss": 0.4109,
"step": 6000
},
{
"epoch": 0.8639021796916534,
"grad_norm": 0.09887880831956863,
"learning_rate": 0.0009136097820308346,
"loss": 0.4508,
"step": 6500
},
{
"epoch": 0.9303561935140883,
"grad_norm": 0.005311007611453533,
"learning_rate": 0.0009069643806485912,
"loss": 0.4011,
"step": 7000
},
{
"epoch": 0.9968102073365231,
"grad_norm": 1.1049816608428955,
"learning_rate": 0.0009003189792663478,
"loss": 0.368,
"step": 7500
},
{
"epoch": 1.0,
"eval_accuracy": 0.9425867507886435,
"eval_f1": 0.9421244141375861,
"eval_loss": 0.3986539840698242,
"eval_precision": 0.9421379340931425,
"eval_recall": 0.9425867507886435,
"eval_runtime": 4.728,
"eval_samples_per_second": 335.238,
"eval_steps_per_second": 83.968,
"step": 7524
},
{
"epoch": 1.063264221158958,
"grad_norm": 70.09782409667969,
"learning_rate": 0.0008936735778841042,
"loss": 0.3306,
"step": 8000
},
{
"epoch": 1.1297182349813928,
"grad_norm": 0.7961419820785522,
"learning_rate": 0.0008870281765018608,
"loss": 0.3746,
"step": 8500
},
{
"epoch": 1.1961722488038278,
"grad_norm": 0.060738347470760345,
"learning_rate": 0.0008803827751196173,
"loss": 0.4045,
"step": 9000
},
{
"epoch": 1.2626262626262625,
"grad_norm": 0.20715029537677765,
"learning_rate": 0.0008737373737373737,
"loss": 0.4587,
"step": 9500
},
{
"epoch": 1.3290802764486975,
"grad_norm": 0.08913299441337585,
"learning_rate": 0.0008670919723551303,
"loss": 0.4504,
"step": 10000
},
{
"epoch": 1.3955342902711323,
"grad_norm": 0.14319421350955963,
"learning_rate": 0.0008604465709728868,
"loss": 0.3991,
"step": 10500
},
{
"epoch": 1.4619883040935673,
"grad_norm": 2.545884370803833,
"learning_rate": 0.0008538011695906432,
"loss": 0.4192,
"step": 11000
},
{
"epoch": 1.528442317916002,
"grad_norm": 0.12403066456317902,
"learning_rate": 0.0008471557682083998,
"loss": 0.3563,
"step": 11500
},
{
"epoch": 1.594896331738437,
"grad_norm": 41.519954681396484,
"learning_rate": 0.0008405103668261563,
"loss": 0.3435,
"step": 12000
},
{
"epoch": 1.661350345560872,
"grad_norm": 83.61852264404297,
"learning_rate": 0.0008338649654439129,
"loss": 0.3503,
"step": 12500
},
{
"epoch": 1.7278043593833068,
"grad_norm": 0.001769404741935432,
"learning_rate": 0.0008272195640616694,
"loss": 0.3238,
"step": 13000
},
{
"epoch": 1.7942583732057416,
"grad_norm": 1.7677043676376343,
"learning_rate": 0.0008205741626794258,
"loss": 0.38,
"step": 13500
},
{
"epoch": 1.8607123870281765,
"grad_norm": 1.0566127300262451,
"learning_rate": 0.0008139287612971824,
"loss": 0.4146,
"step": 14000
},
{
"epoch": 1.9271664008506115,
"grad_norm": 19.463109970092773,
"learning_rate": 0.0008072833599149389,
"loss": 0.4305,
"step": 14500
},
{
"epoch": 1.9936204146730463,
"grad_norm": 17.069889068603516,
"learning_rate": 0.0008006379585326954,
"loss": 0.3505,
"step": 15000
},
{
"epoch": 2.0,
"eval_accuracy": 0.9482649842271293,
"eval_f1": 0.9478124684113843,
"eval_loss": 0.3766539990901947,
"eval_precision": 0.9481744874506283,
"eval_recall": 0.9482649842271293,
"eval_runtime": 4.5607,
"eval_samples_per_second": 347.537,
"eval_steps_per_second": 87.049,
"step": 15048
},
{
"epoch": 2.060074428495481,
"grad_norm": 0.4118238389492035,
"learning_rate": 0.000793992557150452,
"loss": 0.3021,
"step": 15500
},
{
"epoch": 2.126528442317916,
"grad_norm": 0.4119320213794708,
"learning_rate": 0.0007873471557682083,
"loss": 0.3166,
"step": 16000
},
{
"epoch": 2.192982456140351,
"grad_norm": 10.00361442565918,
"learning_rate": 0.0007807017543859649,
"loss": 0.374,
"step": 16500
},
{
"epoch": 2.2594364699627856,
"grad_norm": 44.608726501464844,
"learning_rate": 0.0007740563530037215,
"loss": 0.4748,
"step": 17000
},
{
"epoch": 2.3258904837852206,
"grad_norm": 0.09617531299591064,
"learning_rate": 0.000767410951621478,
"loss": 0.3771,
"step": 17500
},
{
"epoch": 2.3923444976076556,
"grad_norm": 26.71993064880371,
"learning_rate": 0.0007607655502392344,
"loss": 0.4181,
"step": 18000
},
{
"epoch": 2.4587985114300905,
"grad_norm": 0.003970532212406397,
"learning_rate": 0.000754120148856991,
"loss": 0.3365,
"step": 18500
},
{
"epoch": 2.525252525252525,
"grad_norm": 0.023912647739052773,
"learning_rate": 0.0007474747474747475,
"loss": 0.3731,
"step": 19000
},
{
"epoch": 2.59170653907496,
"grad_norm": 0.08333996683359146,
"learning_rate": 0.000740829346092504,
"loss": 0.4489,
"step": 19500
},
{
"epoch": 2.658160552897395,
"grad_norm": 0.01645304262638092,
"learning_rate": 0.0007341839447102606,
"loss": 0.4246,
"step": 20000
},
{
"epoch": 2.72461456671983,
"grad_norm": 0.08779849112033844,
"learning_rate": 0.000727538543328017,
"loss": 0.4556,
"step": 20500
},
{
"epoch": 2.7910685805422646,
"grad_norm": 52.66293716430664,
"learning_rate": 0.0007208931419457735,
"loss": 0.3538,
"step": 21000
},
{
"epoch": 2.8575225943646996,
"grad_norm": 0.028336428105831146,
"learning_rate": 0.00071424774056353,
"loss": 0.3813,
"step": 21500
},
{
"epoch": 2.9239766081871346,
"grad_norm": 0.30558499693870544,
"learning_rate": 0.0007076023391812866,
"loss": 0.4138,
"step": 22000
},
{
"epoch": 2.990430622009569,
"grad_norm": 30.89914321899414,
"learning_rate": 0.0007009569377990431,
"loss": 0.3391,
"step": 22500
},
{
"epoch": 3.0,
"eval_accuracy": 0.9539432176656152,
"eval_f1": 0.95367799565447,
"eval_loss": 0.34262794256210327,
"eval_precision": 0.9535465559361256,
"eval_recall": 0.9539432176656152,
"eval_runtime": 4.5296,
"eval_samples_per_second": 349.923,
"eval_steps_per_second": 87.646,
"step": 22572
},
{
"epoch": 3.056884635832004,
"grad_norm": 280.99310302734375,
"learning_rate": 0.0006943115364167995,
"loss": 0.3269,
"step": 23000
},
{
"epoch": 3.123338649654439,
"grad_norm": 0.030926929786801338,
"learning_rate": 0.0006876661350345561,
"loss": 0.3015,
"step": 23500
},
{
"epoch": 3.189792663476874,
"grad_norm": 0.1642533391714096,
"learning_rate": 0.0006810207336523127,
"loss": 0.3959,
"step": 24000
},
{
"epoch": 3.256246677299309,
"grad_norm": 4.198115825653076,
"learning_rate": 0.000674375332270069,
"loss": 0.4014,
"step": 24500
},
{
"epoch": 3.3227006911217436,
"grad_norm": 0.007642796263098717,
"learning_rate": 0.0006677299308878256,
"loss": 0.3203,
"step": 25000
},
{
"epoch": 3.3891547049441786,
"grad_norm": 0.018859192728996277,
"learning_rate": 0.0006610845295055822,
"loss": 0.3617,
"step": 25500
},
{
"epoch": 3.4556087187666136,
"grad_norm": 0.1555991768836975,
"learning_rate": 0.0006544391281233386,
"loss": 0.34,
"step": 26000
},
{
"epoch": 3.522062732589048,
"grad_norm": 0.03736409544944763,
"learning_rate": 0.0006477937267410952,
"loss": 0.3342,
"step": 26500
},
{
"epoch": 3.588516746411483,
"grad_norm": 0.0046156104654073715,
"learning_rate": 0.0006411483253588518,
"loss": 0.3961,
"step": 27000
},
{
"epoch": 3.654970760233918,
"grad_norm": 27.846786499023438,
"learning_rate": 0.0006345029239766082,
"loss": 0.2895,
"step": 27500
},
{
"epoch": 3.721424774056353,
"grad_norm": 19.202760696411133,
"learning_rate": 0.0006278575225943647,
"loss": 0.4071,
"step": 28000
},
{
"epoch": 3.787878787878788,
"grad_norm": 0.007552656345069408,
"learning_rate": 0.0006212121212121212,
"loss": 0.3859,
"step": 28500
},
{
"epoch": 3.8543328017012226,
"grad_norm": 0.029448220506310463,
"learning_rate": 0.0006145667198298778,
"loss": 0.3642,
"step": 29000
},
{
"epoch": 3.9207868155236576,
"grad_norm": 2.9489197731018066,
"learning_rate": 0.0006079213184476342,
"loss": 0.3331,
"step": 29500
},
{
"epoch": 3.9872408293460926,
"grad_norm": 0.13416582345962524,
"learning_rate": 0.0006012759170653907,
"loss": 0.3399,
"step": 30000
},
{
"epoch": 4.0,
"eval_accuracy": 0.9533123028391167,
"eval_f1": 0.9528581216338866,
"eval_loss": 0.36635637283325195,
"eval_precision": 0.9528819559731596,
"eval_recall": 0.9533123028391167,
"eval_runtime": 4.1925,
"eval_samples_per_second": 378.06,
"eval_steps_per_second": 94.694,
"step": 30096
},
{
"epoch": 4.053694843168527,
"grad_norm": 28.457218170166016,
"learning_rate": 0.0005946305156831473,
"loss": 0.3025,
"step": 30500
},
{
"epoch": 4.120148856990962,
"grad_norm": 6.5367112159729,
"learning_rate": 0.0005879851143009038,
"loss": 0.314,
"step": 31000
},
{
"epoch": 4.186602870813397,
"grad_norm": 393.4518737792969,
"learning_rate": 0.0005813397129186602,
"loss": 0.3436,
"step": 31500
},
{
"epoch": 4.253056884635832,
"grad_norm": 0.9848179221153259,
"learning_rate": 0.0005746943115364168,
"loss": 0.2768,
"step": 32000
},
{
"epoch": 4.319510898458267,
"grad_norm": 2.0531139373779297,
"learning_rate": 0.0005680489101541734,
"loss": 0.3134,
"step": 32500
},
{
"epoch": 4.385964912280702,
"grad_norm": 0.055749546736478806,
"learning_rate": 0.0005614035087719298,
"loss": 0.3532,
"step": 33000
},
{
"epoch": 4.452418926103137,
"grad_norm": 0.4778645634651184,
"learning_rate": 0.0005547581073896864,
"loss": 0.3622,
"step": 33500
},
{
"epoch": 4.518872939925571,
"grad_norm": 0.061856046319007874,
"learning_rate": 0.0005481127060074428,
"loss": 0.3426,
"step": 34000
},
{
"epoch": 4.585326953748006,
"grad_norm": 0.026136351749300957,
"learning_rate": 0.0005414673046251993,
"loss": 0.3795,
"step": 34500
},
{
"epoch": 4.651780967570441,
"grad_norm": 0.03556622937321663,
"learning_rate": 0.0005348219032429559,
"loss": 0.3322,
"step": 35000
},
{
"epoch": 4.718234981392876,
"grad_norm": 0.14081618189811707,
"learning_rate": 0.0005281765018607124,
"loss": 0.3722,
"step": 35500
},
{
"epoch": 4.784688995215311,
"grad_norm": 100.0813217163086,
"learning_rate": 0.0005215311004784689,
"loss": 0.3467,
"step": 36000
},
{
"epoch": 4.851143009037746,
"grad_norm": 9.537514686584473,
"learning_rate": 0.0005148856990962254,
"loss": 0.3484,
"step": 36500
},
{
"epoch": 4.917597022860181,
"grad_norm": 0.048729896545410156,
"learning_rate": 0.0005082402977139819,
"loss": 0.3439,
"step": 37000
},
{
"epoch": 4.984051036682615,
"grad_norm": 0.005286164116114378,
"learning_rate": 0.0005015948963317385,
"loss": 0.3023,
"step": 37500
},
{
"epoch": 5.0,
"eval_accuracy": 0.9570977917981073,
"eval_f1": 0.9568038885748729,
"eval_loss": 0.3057607114315033,
"eval_precision": 0.9566095910966326,
"eval_recall": 0.9570977917981073,
"eval_runtime": 4.2904,
"eval_samples_per_second": 369.428,
"eval_steps_per_second": 92.532,
"step": 37620
},
{
"epoch": 5.05050505050505,
"grad_norm": 123.33903503417969,
"learning_rate": 0.000494949494949495,
"loss": 0.3801,
"step": 38000
},
{
"epoch": 5.116959064327485,
"grad_norm": 0.005817115306854248,
"learning_rate": 0.0004883040935672514,
"loss": 0.3047,
"step": 38500
},
{
"epoch": 5.18341307814992,
"grad_norm": 0.16751976311206818,
"learning_rate": 0.000481658692185008,
"loss": 0.4044,
"step": 39000
},
{
"epoch": 5.249867091972355,
"grad_norm": 60.48826599121094,
"learning_rate": 0.0004750132908027645,
"loss": 0.3485,
"step": 39500
},
{
"epoch": 5.31632110579479,
"grad_norm": 157.16188049316406,
"learning_rate": 0.000468367889420521,
"loss": 0.3368,
"step": 40000
},
{
"epoch": 5.382775119617225,
"grad_norm": 45.994049072265625,
"learning_rate": 0.00046172248803827756,
"loss": 0.3816,
"step": 40500
},
{
"epoch": 5.44922913343966,
"grad_norm": 15.62516975402832,
"learning_rate": 0.00045507708665603404,
"loss": 0.324,
"step": 41000
},
{
"epoch": 5.515683147262095,
"grad_norm": 289.2982177734375,
"learning_rate": 0.0004484316852737905,
"loss": 0.3031,
"step": 41500
},
{
"epoch": 5.582137161084529,
"grad_norm": 0.027738776057958603,
"learning_rate": 0.00044178628389154705,
"loss": 0.3392,
"step": 42000
},
{
"epoch": 5.648591174906964,
"grad_norm": 0.02977157197892666,
"learning_rate": 0.0004351408825093036,
"loss": 0.3477,
"step": 42500
},
{
"epoch": 5.715045188729399,
"grad_norm": 1.663713812828064,
"learning_rate": 0.0004284954811270601,
"loss": 0.3993,
"step": 43000
},
{
"epoch": 5.781499202551834,
"grad_norm": 2.4411869049072266,
"learning_rate": 0.0004218500797448166,
"loss": 0.422,
"step": 43500
},
{
"epoch": 5.847953216374269,
"grad_norm": 12.378539085388184,
"learning_rate": 0.0004152046783625731,
"loss": 0.3649,
"step": 44000
},
{
"epoch": 5.914407230196704,
"grad_norm": 82.05158996582031,
"learning_rate": 0.00040855927698032964,
"loss": 0.4191,
"step": 44500
},
{
"epoch": 5.980861244019139,
"grad_norm": 0.008256383240222931,
"learning_rate": 0.0004019138755980861,
"loss": 0.3437,
"step": 45000
},
{
"epoch": 6.0,
"eval_accuracy": 0.9501577287066246,
"eval_f1": 0.9497240205967022,
"eval_loss": 0.31248244643211365,
"eval_precision": 0.949826651119135,
"eval_recall": 0.9501577287066246,
"eval_runtime": 4.1272,
"eval_samples_per_second": 384.034,
"eval_steps_per_second": 96.19,
"step": 45144
},
{
"epoch": 6.047315257841573,
"grad_norm": 0.22720667719841003,
"learning_rate": 0.00039526847421584264,
"loss": 0.3774,
"step": 45500
},
{
"epoch": 6.113769271664008,
"grad_norm": 0.1796969771385193,
"learning_rate": 0.0003886230728335992,
"loss": 0.3625,
"step": 46000
},
{
"epoch": 6.180223285486443,
"grad_norm": 0.06664836406707764,
"learning_rate": 0.00038197767145135565,
"loss": 0.3096,
"step": 46500
},
{
"epoch": 6.246677299308878,
"grad_norm": 52.87346267700195,
"learning_rate": 0.0003753322700691122,
"loss": 0.324,
"step": 47000
},
{
"epoch": 6.313131313131313,
"grad_norm": 0.13641533255577087,
"learning_rate": 0.0003686868686868687,
"loss": 0.3824,
"step": 47500
},
{
"epoch": 6.379585326953748,
"grad_norm": 0.014752733521163464,
"learning_rate": 0.00036204146730462524,
"loss": 0.3576,
"step": 48000
},
{
"epoch": 6.446039340776183,
"grad_norm": 0.07991009950637817,
"learning_rate": 0.0003553960659223817,
"loss": 0.2889,
"step": 48500
},
{
"epoch": 6.512493354598618,
"grad_norm": 0.0857154056429863,
"learning_rate": 0.0003487506645401382,
"loss": 0.3496,
"step": 49000
},
{
"epoch": 6.578947368421053,
"grad_norm": 22.04611587524414,
"learning_rate": 0.00034210526315789477,
"loss": 0.3456,
"step": 49500
},
{
"epoch": 6.645401382243487,
"grad_norm": 0.3360465466976166,
"learning_rate": 0.00033545986177565125,
"loss": 0.3113,
"step": 50000
},
{
"epoch": 6.711855396065922,
"grad_norm": 0.011091183871030807,
"learning_rate": 0.0003288144603934078,
"loss": 0.3085,
"step": 50500
},
{
"epoch": 6.778309409888357,
"grad_norm": 45.16307830810547,
"learning_rate": 0.00032216905901116425,
"loss": 0.261,
"step": 51000
},
{
"epoch": 6.844763423710792,
"grad_norm": 0.10898467898368835,
"learning_rate": 0.0003155236576289208,
"loss": 0.2772,
"step": 51500
},
{
"epoch": 6.911217437533227,
"grad_norm": 0.04280232638120651,
"learning_rate": 0.0003088782562466773,
"loss": 0.3664,
"step": 52000
},
{
"epoch": 6.977671451355662,
"grad_norm": 0.44427451491355896,
"learning_rate": 0.0003022328548644338,
"loss": 0.2981,
"step": 52500
},
{
"epoch": 7.0,
"eval_accuracy": 0.9570977917981073,
"eval_f1": 0.9567609606627793,
"eval_loss": 0.3381944000720978,
"eval_precision": 0.9567551880330806,
"eval_recall": 0.9570977917981073,
"eval_runtime": 4.1238,
"eval_samples_per_second": 384.357,
"eval_steps_per_second": 96.271,
"step": 52668
},
{
"epoch": 7.044125465178097,
"grad_norm": 12.310619354248047,
"learning_rate": 0.00029558745348219037,
"loss": 0.2961,
"step": 53000
},
{
"epoch": 7.110579479000531,
"grad_norm": 0.021439863368868828,
"learning_rate": 0.00028894205209994685,
"loss": 0.3132,
"step": 53500
},
{
"epoch": 7.177033492822966,
"grad_norm": 12.506621360778809,
"learning_rate": 0.0002822966507177033,
"loss": 0.3065,
"step": 54000
},
{
"epoch": 7.243487506645401,
"grad_norm": 40.974212646484375,
"learning_rate": 0.00027565124933545985,
"loss": 0.3052,
"step": 54500
},
{
"epoch": 7.309941520467836,
"grad_norm": 17.352012634277344,
"learning_rate": 0.0002690058479532164,
"loss": 0.3074,
"step": 55000
},
{
"epoch": 7.376395534290271,
"grad_norm": 7.186513423919678,
"learning_rate": 0.0002623604465709729,
"loss": 0.2944,
"step": 55500
},
{
"epoch": 7.442849548112706,
"grad_norm": 0.11422441154718399,
"learning_rate": 0.0002557150451887294,
"loss": 0.3277,
"step": 56000
},
{
"epoch": 7.509303561935141,
"grad_norm": 0.4097649157047272,
"learning_rate": 0.0002490696438064859,
"loss": 0.3314,
"step": 56500
},
{
"epoch": 7.575757575757576,
"grad_norm": 255.17686462402344,
"learning_rate": 0.00024242424242424245,
"loss": 0.3849,
"step": 57000
},
{
"epoch": 7.642211589580011,
"grad_norm": 0.11329037696123123,
"learning_rate": 0.00023577884104199895,
"loss": 0.3603,
"step": 57500
},
{
"epoch": 7.708665603402445,
"grad_norm": 0.04299360513687134,
"learning_rate": 0.00022913343965975545,
"loss": 0.3467,
"step": 58000
},
{
"epoch": 7.77511961722488,
"grad_norm": 0.04895203933119774,
"learning_rate": 0.00022248803827751195,
"loss": 0.3428,
"step": 58500
},
{
"epoch": 7.841573631047315,
"grad_norm": 0.07165663689374924,
"learning_rate": 0.00021584263689526848,
"loss": 0.2874,
"step": 59000
},
{
"epoch": 7.90802764486975,
"grad_norm": 0.10646966099739075,
"learning_rate": 0.00020919723551302499,
"loss": 0.2834,
"step": 59500
},
{
"epoch": 7.974481658692185,
"grad_norm": 0.022936491295695305,
"learning_rate": 0.00020255183413078152,
"loss": 0.2899,
"step": 60000
},
{
"epoch": 8.0,
"eval_accuracy": 0.9577287066246056,
"eval_f1": 0.9575092656624108,
"eval_loss": 0.30500882863998413,
"eval_precision": 0.9575766504306299,
"eval_recall": 0.9577287066246056,
"eval_runtime": 4.5012,
"eval_samples_per_second": 352.132,
"eval_steps_per_second": 88.2,
"step": 60192
},
{
"epoch": 8.04093567251462,
"grad_norm": 0.4371676743030548,
"learning_rate": 0.00019590643274853802,
"loss": 0.3231,
"step": 60500
},
{
"epoch": 8.107389686337054,
"grad_norm": 0.000947824795730412,
"learning_rate": 0.00018926103136629452,
"loss": 0.3014,
"step": 61000
},
{
"epoch": 8.17384370015949,
"grad_norm": 0.06363413482904434,
"learning_rate": 0.00018261562998405105,
"loss": 0.2293,
"step": 61500
},
{
"epoch": 8.240297713981924,
"grad_norm": 1.2114511728286743,
"learning_rate": 0.00017597022860180755,
"loss": 0.2808,
"step": 62000
},
{
"epoch": 8.30675172780436,
"grad_norm": 23.535938262939453,
"learning_rate": 0.00016932482721956408,
"loss": 0.2595,
"step": 62500
},
{
"epoch": 8.373205741626794,
"grad_norm": 60.49204635620117,
"learning_rate": 0.00016267942583732056,
"loss": 0.3388,
"step": 63000
},
{
"epoch": 8.43965975544923,
"grad_norm": 14.233682632446289,
"learning_rate": 0.0001560340244550771,
"loss": 0.3423,
"step": 63500
},
{
"epoch": 8.506113769271664,
"grad_norm": 0.015386885032057762,
"learning_rate": 0.0001493886230728336,
"loss": 0.316,
"step": 64000
},
{
"epoch": 8.5725677830941,
"grad_norm": 0.3906301259994507,
"learning_rate": 0.00014274322169059012,
"loss": 0.3165,
"step": 64500
},
{
"epoch": 8.639021796916534,
"grad_norm": 0.0586216077208519,
"learning_rate": 0.00013609782030834665,
"loss": 0.3013,
"step": 65000
},
{
"epoch": 8.70547581073897,
"grad_norm": 0.006104405503720045,
"learning_rate": 0.00012945241892610312,
"loss": 0.2352,
"step": 65500
},
{
"epoch": 8.771929824561404,
"grad_norm": 0.02979845367372036,
"learning_rate": 0.00012280701754385965,
"loss": 0.2203,
"step": 66000
},
{
"epoch": 8.83838383838384,
"grad_norm": 0.08639369904994965,
"learning_rate": 0.00011616161616161616,
"loss": 0.2643,
"step": 66500
},
{
"epoch": 8.904837852206274,
"grad_norm": 32.0872802734375,
"learning_rate": 0.00010951621477937269,
"loss": 0.2658,
"step": 67000
},
{
"epoch": 8.971291866028707,
"grad_norm": 0.011845378205180168,
"learning_rate": 0.00010287081339712919,
"loss": 0.2795,
"step": 67500
},
{
"epoch": 9.0,
"eval_accuracy": 0.9646687697160883,
"eval_f1": 0.9644253672098426,
"eval_loss": 0.2595302164554596,
"eval_precision": 0.9644475825303181,
"eval_recall": 0.9646687697160883,
"eval_runtime": 4.3195,
"eval_samples_per_second": 366.941,
"eval_steps_per_second": 91.909,
"step": 67716
}
],
"logging_steps": 500,
"max_steps": 75240,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2551274670587520.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}