Whisper-Base-KhmerV2 / trainer_state.json
Vira21's picture
Upload 16 files
f5344c2 verified
{
"best_metric": 0.4528582034149963,
"best_model_checkpoint": "d:\\DataTicon\\Whisper-Khmer-Small\\whisper-khmer\\outputs\\whisper-base-khmer\\checkpoint-4400",
"epoch": 2.962962962962963,
"eval_steps": 400,
"global_step": 4400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.006734006734006734,
"grad_norm": 22.712024688720703,
"learning_rate": 7.000000000000001e-06,
"loss": 2.8858,
"step": 10
},
{
"epoch": 0.013468013468013467,
"grad_norm": 17.574142456054688,
"learning_rate": 1.7000000000000003e-05,
"loss": 2.2953,
"step": 20
},
{
"epoch": 0.020202020202020204,
"grad_norm": 16.47330093383789,
"learning_rate": 2.7000000000000002e-05,
"loss": 1.8692,
"step": 30
},
{
"epoch": 0.026936026936026935,
"grad_norm": 10.106539726257324,
"learning_rate": 3.7e-05,
"loss": 1.5765,
"step": 40
},
{
"epoch": 0.03367003367003367,
"grad_norm": 15.11670970916748,
"learning_rate": 4.7e-05,
"loss": 1.4941,
"step": 50
},
{
"epoch": 0.04040404040404041,
"grad_norm": 15.619561195373535,
"learning_rate": 4.992054483541431e-05,
"loss": 1.4231,
"step": 60
},
{
"epoch": 0.04713804713804714,
"grad_norm": 11.616868019104004,
"learning_rate": 4.9807037457434736e-05,
"loss": 1.384,
"step": 70
},
{
"epoch": 0.05387205387205387,
"grad_norm": 20.449440002441406,
"learning_rate": 4.969353007945517e-05,
"loss": 1.3234,
"step": 80
},
{
"epoch": 0.06060606060606061,
"grad_norm": 12.725702285766602,
"learning_rate": 4.95800227014756e-05,
"loss": 1.328,
"step": 90
},
{
"epoch": 0.06734006734006734,
"grad_norm": 15.14296817779541,
"learning_rate": 4.946651532349603e-05,
"loss": 1.3084,
"step": 100
},
{
"epoch": 0.07407407407407407,
"grad_norm": 11.94970417022705,
"learning_rate": 4.935300794551646e-05,
"loss": 1.2537,
"step": 110
},
{
"epoch": 0.08080808080808081,
"grad_norm": 8.479811668395996,
"learning_rate": 4.92395005675369e-05,
"loss": 1.2109,
"step": 120
},
{
"epoch": 0.08754208754208755,
"grad_norm": 14.986007690429688,
"learning_rate": 4.9125993189557325e-05,
"loss": 1.1808,
"step": 130
},
{
"epoch": 0.09427609427609428,
"grad_norm": 12.524420738220215,
"learning_rate": 4.901248581157776e-05,
"loss": 1.1299,
"step": 140
},
{
"epoch": 0.10101010101010101,
"grad_norm": 12.007112503051758,
"learning_rate": 4.8898978433598185e-05,
"loss": 1.151,
"step": 150
},
{
"epoch": 0.10774410774410774,
"grad_norm": 11.51995849609375,
"learning_rate": 4.878547105561862e-05,
"loss": 1.0585,
"step": 160
},
{
"epoch": 0.11447811447811448,
"grad_norm": 9.454591751098633,
"learning_rate": 4.8671963677639046e-05,
"loss": 0.9909,
"step": 170
},
{
"epoch": 0.12121212121212122,
"grad_norm": 9.037362098693848,
"learning_rate": 4.855845629965948e-05,
"loss": 0.9491,
"step": 180
},
{
"epoch": 0.12794612794612795,
"grad_norm": 11.153435707092285,
"learning_rate": 4.844494892167991e-05,
"loss": 0.9348,
"step": 190
},
{
"epoch": 0.13468013468013468,
"grad_norm": 10.488895416259766,
"learning_rate": 4.833144154370035e-05,
"loss": 0.8737,
"step": 200
},
{
"epoch": 0.1414141414141414,
"grad_norm": 7.803547382354736,
"learning_rate": 4.8217934165720774e-05,
"loss": 0.7626,
"step": 210
},
{
"epoch": 0.14814814814814814,
"grad_norm": 8.341965675354004,
"learning_rate": 4.810442678774121e-05,
"loss": 0.7547,
"step": 220
},
{
"epoch": 0.15488215488215487,
"grad_norm": 8.866105079650879,
"learning_rate": 4.7990919409761635e-05,
"loss": 0.6779,
"step": 230
},
{
"epoch": 0.16161616161616163,
"grad_norm": 11.127110481262207,
"learning_rate": 4.787741203178207e-05,
"loss": 0.7238,
"step": 240
},
{
"epoch": 0.16835016835016836,
"grad_norm": 10.66511344909668,
"learning_rate": 4.77639046538025e-05,
"loss": 0.6632,
"step": 250
},
{
"epoch": 0.1750841750841751,
"grad_norm": 9.223587989807129,
"learning_rate": 4.7650397275822936e-05,
"loss": 0.6478,
"step": 260
},
{
"epoch": 0.18181818181818182,
"grad_norm": 8.034420013427734,
"learning_rate": 4.753688989784336e-05,
"loss": 0.6227,
"step": 270
},
{
"epoch": 0.18855218855218855,
"grad_norm": 8.181520462036133,
"learning_rate": 4.7423382519863796e-05,
"loss": 0.6241,
"step": 280
},
{
"epoch": 0.19528619528619529,
"grad_norm": 8.540548324584961,
"learning_rate": 4.730987514188422e-05,
"loss": 0.5726,
"step": 290
},
{
"epoch": 0.20202020202020202,
"grad_norm": 10.086724281311035,
"learning_rate": 4.719636776390466e-05,
"loss": 0.5918,
"step": 300
},
{
"epoch": 0.20875420875420875,
"grad_norm": 6.169092178344727,
"learning_rate": 4.708286038592509e-05,
"loss": 0.5442,
"step": 310
},
{
"epoch": 0.21548821548821548,
"grad_norm": 8.274078369140625,
"learning_rate": 4.6969353007945524e-05,
"loss": 0.5636,
"step": 320
},
{
"epoch": 0.2222222222222222,
"grad_norm": 7.014498710632324,
"learning_rate": 4.685584562996595e-05,
"loss": 0.5154,
"step": 330
},
{
"epoch": 0.22895622895622897,
"grad_norm": 7.540900707244873,
"learning_rate": 4.6742338251986385e-05,
"loss": 0.5323,
"step": 340
},
{
"epoch": 0.2356902356902357,
"grad_norm": 9.698654174804688,
"learning_rate": 4.662883087400681e-05,
"loss": 0.519,
"step": 350
},
{
"epoch": 0.24242424242424243,
"grad_norm": 5.288636207580566,
"learning_rate": 4.6515323496027245e-05,
"loss": 0.4927,
"step": 360
},
{
"epoch": 0.24915824915824916,
"grad_norm": 6.129817485809326,
"learning_rate": 4.640181611804767e-05,
"loss": 0.5281,
"step": 370
},
{
"epoch": 0.2558922558922559,
"grad_norm": 8.628268241882324,
"learning_rate": 4.6288308740068106e-05,
"loss": 0.4962,
"step": 380
},
{
"epoch": 0.26262626262626265,
"grad_norm": 7.04541015625,
"learning_rate": 4.617480136208854e-05,
"loss": 0.4874,
"step": 390
},
{
"epoch": 0.26936026936026936,
"grad_norm": 6.490813732147217,
"learning_rate": 4.606129398410897e-05,
"loss": 0.4962,
"step": 400
},
{
"epoch": 0.26936026936026936,
"eval_loss": 0.5467123985290527,
"eval_runtime": 806.5112,
"eval_samples_per_second": 1.758,
"eval_steps_per_second": 0.11,
"eval_wer": 0.5883444691907943,
"step": 400
},
{
"epoch": 0.2760942760942761,
"grad_norm": 6.944798946380615,
"learning_rate": 4.59477866061294e-05,
"loss": 0.473,
"step": 410
},
{
"epoch": 0.2828282828282828,
"grad_norm": 6.29971981048584,
"learning_rate": 4.5834279228149834e-05,
"loss": 0.4672,
"step": 420
},
{
"epoch": 0.2895622895622896,
"grad_norm": 6.314589500427246,
"learning_rate": 4.572077185017026e-05,
"loss": 0.4657,
"step": 430
},
{
"epoch": 0.2962962962962963,
"grad_norm": 5.14242696762085,
"learning_rate": 4.5607264472190694e-05,
"loss": 0.4613,
"step": 440
},
{
"epoch": 0.30303030303030304,
"grad_norm": 6.764094352722168,
"learning_rate": 4.549375709421113e-05,
"loss": 0.4798,
"step": 450
},
{
"epoch": 0.30976430976430974,
"grad_norm": 7.024701118469238,
"learning_rate": 4.538024971623156e-05,
"loss": 0.4509,
"step": 460
},
{
"epoch": 0.3164983164983165,
"grad_norm": 4.914060592651367,
"learning_rate": 4.526674233825199e-05,
"loss": 0.4442,
"step": 470
},
{
"epoch": 0.32323232323232326,
"grad_norm": 5.153116703033447,
"learning_rate": 4.515323496027242e-05,
"loss": 0.4368,
"step": 480
},
{
"epoch": 0.32996632996632996,
"grad_norm": 6.269533157348633,
"learning_rate": 4.503972758229285e-05,
"loss": 0.4467,
"step": 490
},
{
"epoch": 0.3367003367003367,
"grad_norm": 6.478705406188965,
"learning_rate": 4.492622020431328e-05,
"loss": 0.4453,
"step": 500
},
{
"epoch": 0.3434343434343434,
"grad_norm": 5.625921249389648,
"learning_rate": 4.481271282633372e-05,
"loss": 0.3846,
"step": 510
},
{
"epoch": 0.3501683501683502,
"grad_norm": 5.453153133392334,
"learning_rate": 4.469920544835415e-05,
"loss": 0.3811,
"step": 520
},
{
"epoch": 0.3569023569023569,
"grad_norm": 6.992231369018555,
"learning_rate": 4.458569807037458e-05,
"loss": 0.4318,
"step": 530
},
{
"epoch": 0.36363636363636365,
"grad_norm": 5.616722583770752,
"learning_rate": 4.447219069239501e-05,
"loss": 0.3774,
"step": 540
},
{
"epoch": 0.37037037037037035,
"grad_norm": 4.499491214752197,
"learning_rate": 4.435868331441544e-05,
"loss": 0.4302,
"step": 550
},
{
"epoch": 0.3771043771043771,
"grad_norm": 5.019254207611084,
"learning_rate": 4.424517593643587e-05,
"loss": 0.405,
"step": 560
},
{
"epoch": 0.3838383838383838,
"grad_norm": 4.1655144691467285,
"learning_rate": 4.41316685584563e-05,
"loss": 0.3785,
"step": 570
},
{
"epoch": 0.39057239057239057,
"grad_norm": 4.204577922821045,
"learning_rate": 4.401816118047674e-05,
"loss": 0.3917,
"step": 580
},
{
"epoch": 0.39730639730639733,
"grad_norm": 5.208505153656006,
"learning_rate": 4.3904653802497166e-05,
"loss": 0.4067,
"step": 590
},
{
"epoch": 0.40404040404040403,
"grad_norm": 5.3686418533325195,
"learning_rate": 4.37911464245176e-05,
"loss": 0.3595,
"step": 600
},
{
"epoch": 0.4107744107744108,
"grad_norm": 4.19749641418457,
"learning_rate": 4.3677639046538026e-05,
"loss": 0.4,
"step": 610
},
{
"epoch": 0.4175084175084175,
"grad_norm": 7.22583532333374,
"learning_rate": 4.356413166855846e-05,
"loss": 0.3517,
"step": 620
},
{
"epoch": 0.42424242424242425,
"grad_norm": 5.825573921203613,
"learning_rate": 4.345062429057889e-05,
"loss": 0.4059,
"step": 630
},
{
"epoch": 0.43097643097643096,
"grad_norm": 5.724638938903809,
"learning_rate": 4.333711691259932e-05,
"loss": 0.3847,
"step": 640
},
{
"epoch": 0.4377104377104377,
"grad_norm": 7.2732954025268555,
"learning_rate": 4.3223609534619754e-05,
"loss": 0.3559,
"step": 650
},
{
"epoch": 0.4444444444444444,
"grad_norm": 5.488597393035889,
"learning_rate": 4.311010215664019e-05,
"loss": 0.3833,
"step": 660
},
{
"epoch": 0.4511784511784512,
"grad_norm": 6.655267715454102,
"learning_rate": 4.2996594778660615e-05,
"loss": 0.3468,
"step": 670
},
{
"epoch": 0.45791245791245794,
"grad_norm": 6.509310245513916,
"learning_rate": 4.288308740068105e-05,
"loss": 0.3865,
"step": 680
},
{
"epoch": 0.46464646464646464,
"grad_norm": 4.374180793762207,
"learning_rate": 4.2769580022701476e-05,
"loss": 0.3617,
"step": 690
},
{
"epoch": 0.4713804713804714,
"grad_norm": 3.9861130714416504,
"learning_rate": 4.265607264472191e-05,
"loss": 0.3495,
"step": 700
},
{
"epoch": 0.4781144781144781,
"grad_norm": 4.877681732177734,
"learning_rate": 4.254256526674234e-05,
"loss": 0.3662,
"step": 710
},
{
"epoch": 0.48484848484848486,
"grad_norm": 4.442035675048828,
"learning_rate": 4.242905788876278e-05,
"loss": 0.3842,
"step": 720
},
{
"epoch": 0.49158249158249157,
"grad_norm": 4.463146209716797,
"learning_rate": 4.2315550510783204e-05,
"loss": 0.3786,
"step": 730
},
{
"epoch": 0.4983164983164983,
"grad_norm": 5.221556186676025,
"learning_rate": 4.220204313280364e-05,
"loss": 0.3684,
"step": 740
},
{
"epoch": 0.5050505050505051,
"grad_norm": 5.097838878631592,
"learning_rate": 4.2088535754824064e-05,
"loss": 0.3175,
"step": 750
},
{
"epoch": 0.5117845117845118,
"grad_norm": 4.413094997406006,
"learning_rate": 4.19750283768445e-05,
"loss": 0.3759,
"step": 760
},
{
"epoch": 0.5185185185185185,
"grad_norm": 4.710460662841797,
"learning_rate": 4.186152099886493e-05,
"loss": 0.3746,
"step": 770
},
{
"epoch": 0.5252525252525253,
"grad_norm": 4.644078254699707,
"learning_rate": 4.1748013620885365e-05,
"loss": 0.3613,
"step": 780
},
{
"epoch": 0.531986531986532,
"grad_norm": 3.328462839126587,
"learning_rate": 4.163450624290579e-05,
"loss": 0.3326,
"step": 790
},
{
"epoch": 0.5387205387205387,
"grad_norm": 4.52667236328125,
"learning_rate": 4.1520998864926226e-05,
"loss": 0.3349,
"step": 800
},
{
"epoch": 0.5387205387205387,
"eval_loss": 0.49282562732696533,
"eval_runtime": 1123.4733,
"eval_samples_per_second": 1.262,
"eval_steps_per_second": 0.079,
"eval_wer": 0.5612186625549654,
"step": 800
},
{
"epoch": 0.5454545454545454,
"grad_norm": 4.9449262619018555,
"learning_rate": 4.140749148694665e-05,
"loss": 0.3366,
"step": 810
},
{
"epoch": 0.5521885521885522,
"grad_norm": 5.562499523162842,
"learning_rate": 4.1293984108967086e-05,
"loss": 0.3781,
"step": 820
},
{
"epoch": 0.5589225589225589,
"grad_norm": 4.4809250831604,
"learning_rate": 4.118047673098751e-05,
"loss": 0.3607,
"step": 830
},
{
"epoch": 0.5656565656565656,
"grad_norm": 4.371147155761719,
"learning_rate": 4.106696935300795e-05,
"loss": 0.3082,
"step": 840
},
{
"epoch": 0.5723905723905723,
"grad_norm": 5.5584893226623535,
"learning_rate": 4.095346197502838e-05,
"loss": 0.3384,
"step": 850
},
{
"epoch": 0.5791245791245792,
"grad_norm": 4.966277599334717,
"learning_rate": 4.0839954597048814e-05,
"loss": 0.3328,
"step": 860
},
{
"epoch": 0.5858585858585859,
"grad_norm": 3.8009321689605713,
"learning_rate": 4.072644721906924e-05,
"loss": 0.3201,
"step": 870
},
{
"epoch": 0.5925925925925926,
"grad_norm": 5.3277106285095215,
"learning_rate": 4.0612939841089675e-05,
"loss": 0.3345,
"step": 880
},
{
"epoch": 0.5993265993265994,
"grad_norm": 4.464631080627441,
"learning_rate": 4.04994324631101e-05,
"loss": 0.3081,
"step": 890
},
{
"epoch": 0.6060606060606061,
"grad_norm": 4.56332540512085,
"learning_rate": 4.0385925085130536e-05,
"loss": 0.3643,
"step": 900
},
{
"epoch": 0.6127946127946128,
"grad_norm": 4.806687831878662,
"learning_rate": 4.027241770715097e-05,
"loss": 0.3662,
"step": 910
},
{
"epoch": 0.6195286195286195,
"grad_norm": 5.562252998352051,
"learning_rate": 4.01589103291714e-05,
"loss": 0.3436,
"step": 920
},
{
"epoch": 0.6262626262626263,
"grad_norm": 4.167670249938965,
"learning_rate": 4.004540295119183e-05,
"loss": 0.3238,
"step": 930
},
{
"epoch": 0.632996632996633,
"grad_norm": 4.628272533416748,
"learning_rate": 3.993189557321226e-05,
"loss": 0.3351,
"step": 940
},
{
"epoch": 0.6397306397306397,
"grad_norm": 6.1379828453063965,
"learning_rate": 3.981838819523269e-05,
"loss": 0.3199,
"step": 950
},
{
"epoch": 0.6464646464646465,
"grad_norm": 4.196822643280029,
"learning_rate": 3.970488081725312e-05,
"loss": 0.3056,
"step": 960
},
{
"epoch": 0.6531986531986532,
"grad_norm": 5.613431930541992,
"learning_rate": 3.959137343927356e-05,
"loss": 0.3292,
"step": 970
},
{
"epoch": 0.6599326599326599,
"grad_norm": 4.543855667114258,
"learning_rate": 3.9477866061293985e-05,
"loss": 0.3001,
"step": 980
},
{
"epoch": 0.6666666666666666,
"grad_norm": 3.637694835662842,
"learning_rate": 3.936435868331442e-05,
"loss": 0.2602,
"step": 990
},
{
"epoch": 0.6734006734006734,
"grad_norm": 6.640063285827637,
"learning_rate": 3.9250851305334845e-05,
"loss": 0.3195,
"step": 1000
},
{
"epoch": 0.6801346801346801,
"grad_norm": 4.616398334503174,
"learning_rate": 3.913734392735528e-05,
"loss": 0.3157,
"step": 1010
},
{
"epoch": 0.6868686868686869,
"grad_norm": 5.17544412612915,
"learning_rate": 3.9023836549375706e-05,
"loss": 0.3079,
"step": 1020
},
{
"epoch": 0.6936026936026936,
"grad_norm": 5.558164596557617,
"learning_rate": 3.891032917139614e-05,
"loss": 0.3093,
"step": 1030
},
{
"epoch": 0.7003367003367004,
"grad_norm": 4.862564563751221,
"learning_rate": 3.879682179341657e-05,
"loss": 0.3255,
"step": 1040
},
{
"epoch": 0.7070707070707071,
"grad_norm": 3.790825605392456,
"learning_rate": 3.868331441543701e-05,
"loss": 0.3331,
"step": 1050
},
{
"epoch": 0.7138047138047138,
"grad_norm": 4.081621170043945,
"learning_rate": 3.8569807037457434e-05,
"loss": 0.275,
"step": 1060
},
{
"epoch": 0.7205387205387206,
"grad_norm": 3.9744527339935303,
"learning_rate": 3.845629965947787e-05,
"loss": 0.2978,
"step": 1070
},
{
"epoch": 0.7272727272727273,
"grad_norm": 4.056180953979492,
"learning_rate": 3.8342792281498294e-05,
"loss": 0.2912,
"step": 1080
},
{
"epoch": 0.734006734006734,
"grad_norm": 5.50215482711792,
"learning_rate": 3.822928490351873e-05,
"loss": 0.2738,
"step": 1090
},
{
"epoch": 0.7407407407407407,
"grad_norm": 6.039124488830566,
"learning_rate": 3.811577752553916e-05,
"loss": 0.2912,
"step": 1100
},
{
"epoch": 0.7474747474747475,
"grad_norm": 4.475265979766846,
"learning_rate": 3.8002270147559595e-05,
"loss": 0.3059,
"step": 1110
},
{
"epoch": 0.7542087542087542,
"grad_norm": 3.8151988983154297,
"learning_rate": 3.788876276958002e-05,
"loss": 0.2865,
"step": 1120
},
{
"epoch": 0.7609427609427609,
"grad_norm": 4.704629898071289,
"learning_rate": 3.7775255391600456e-05,
"loss": 0.2938,
"step": 1130
},
{
"epoch": 0.7676767676767676,
"grad_norm": 4.103381633758545,
"learning_rate": 3.766174801362088e-05,
"loss": 0.277,
"step": 1140
},
{
"epoch": 0.7744107744107744,
"grad_norm": 5.573786735534668,
"learning_rate": 3.754824063564132e-05,
"loss": 0.3232,
"step": 1150
},
{
"epoch": 0.7811447811447811,
"grad_norm": 3.373387575149536,
"learning_rate": 3.743473325766175e-05,
"loss": 0.3285,
"step": 1160
},
{
"epoch": 0.7878787878787878,
"grad_norm": 3.7531933784484863,
"learning_rate": 3.7321225879682184e-05,
"loss": 0.2596,
"step": 1170
},
{
"epoch": 0.7946127946127947,
"grad_norm": 5.006664752960205,
"learning_rate": 3.720771850170261e-05,
"loss": 0.2919,
"step": 1180
},
{
"epoch": 0.8013468013468014,
"grad_norm": 6.7509307861328125,
"learning_rate": 3.7094211123723045e-05,
"loss": 0.2947,
"step": 1190
},
{
"epoch": 0.8080808080808081,
"grad_norm": 3.8846304416656494,
"learning_rate": 3.698070374574347e-05,
"loss": 0.2906,
"step": 1200
},
{
"epoch": 0.8080808080808081,
"eval_loss": 0.4824906289577484,
"eval_runtime": 688.719,
"eval_samples_per_second": 2.059,
"eval_steps_per_second": 0.129,
"eval_wer": 0.5195876877391354,
"step": 1200
},
{
"epoch": 0.8148148148148148,
"grad_norm": 4.751924514770508,
"learning_rate": 3.6867196367763905e-05,
"loss": 0.2785,
"step": 1210
},
{
"epoch": 0.8215488215488216,
"grad_norm": 3.7664754390716553,
"learning_rate": 3.675368898978433e-05,
"loss": 0.3289,
"step": 1220
},
{
"epoch": 0.8282828282828283,
"grad_norm": 5.232487201690674,
"learning_rate": 3.6640181611804766e-05,
"loss": 0.3174,
"step": 1230
},
{
"epoch": 0.835016835016835,
"grad_norm": 2.880322217941284,
"learning_rate": 3.65266742338252e-05,
"loss": 0.2686,
"step": 1240
},
{
"epoch": 0.8417508417508418,
"grad_norm": 4.3191609382629395,
"learning_rate": 3.641316685584563e-05,
"loss": 0.3059,
"step": 1250
},
{
"epoch": 0.8484848484848485,
"grad_norm": 3.35304594039917,
"learning_rate": 3.629965947786606e-05,
"loss": 0.2693,
"step": 1260
},
{
"epoch": 0.8552188552188552,
"grad_norm": 3.989720582962036,
"learning_rate": 3.6186152099886494e-05,
"loss": 0.2748,
"step": 1270
},
{
"epoch": 0.8619528619528619,
"grad_norm": 2.795743942260742,
"learning_rate": 3.607264472190692e-05,
"loss": 0.2608,
"step": 1280
},
{
"epoch": 0.8686868686868687,
"grad_norm": 5.1076226234436035,
"learning_rate": 3.5959137343927354e-05,
"loss": 0.2979,
"step": 1290
},
{
"epoch": 0.8754208754208754,
"grad_norm": 3.131528854370117,
"learning_rate": 3.584562996594779e-05,
"loss": 0.2865,
"step": 1300
},
{
"epoch": 0.8821548821548821,
"grad_norm": 6.658942699432373,
"learning_rate": 3.573212258796822e-05,
"loss": 0.2727,
"step": 1310
},
{
"epoch": 0.8888888888888888,
"grad_norm": 4.044928073883057,
"learning_rate": 3.561861520998865e-05,
"loss": 0.2913,
"step": 1320
},
{
"epoch": 0.8956228956228957,
"grad_norm": 4.87237024307251,
"learning_rate": 3.550510783200908e-05,
"loss": 0.2485,
"step": 1330
},
{
"epoch": 0.9023569023569024,
"grad_norm": 3.8377342224121094,
"learning_rate": 3.539160045402951e-05,
"loss": 0.2707,
"step": 1340
},
{
"epoch": 0.9090909090909091,
"grad_norm": 3.848212957382202,
"learning_rate": 3.527809307604994e-05,
"loss": 0.2375,
"step": 1350
},
{
"epoch": 0.9158249158249159,
"grad_norm": 3.686363697052002,
"learning_rate": 3.5164585698070377e-05,
"loss": 0.2597,
"step": 1360
},
{
"epoch": 0.9225589225589226,
"grad_norm": 4.444821834564209,
"learning_rate": 3.505107832009081e-05,
"loss": 0.2599,
"step": 1370
},
{
"epoch": 0.9292929292929293,
"grad_norm": 4.101839542388916,
"learning_rate": 3.493757094211124e-05,
"loss": 0.297,
"step": 1380
},
{
"epoch": 0.936026936026936,
"grad_norm": 4.912603855133057,
"learning_rate": 3.482406356413167e-05,
"loss": 0.2557,
"step": 1390
},
{
"epoch": 0.9427609427609428,
"grad_norm": 4.1229248046875,
"learning_rate": 3.47105561861521e-05,
"loss": 0.271,
"step": 1400
},
{
"epoch": 0.9494949494949495,
"grad_norm": 3.668956756591797,
"learning_rate": 3.459704880817253e-05,
"loss": 0.286,
"step": 1410
},
{
"epoch": 0.9562289562289562,
"grad_norm": 5.052644729614258,
"learning_rate": 3.448354143019296e-05,
"loss": 0.2841,
"step": 1420
},
{
"epoch": 0.9629629629629629,
"grad_norm": 3.9958231449127197,
"learning_rate": 3.43700340522134e-05,
"loss": 0.2473,
"step": 1430
},
{
"epoch": 0.9696969696969697,
"grad_norm": 4.527822971343994,
"learning_rate": 3.4256526674233826e-05,
"loss": 0.2867,
"step": 1440
},
{
"epoch": 0.9764309764309764,
"grad_norm": 3.6779627799987793,
"learning_rate": 3.414301929625426e-05,
"loss": 0.2547,
"step": 1450
},
{
"epoch": 0.9831649831649831,
"grad_norm": 4.0302581787109375,
"learning_rate": 3.4029511918274686e-05,
"loss": 0.2697,
"step": 1460
},
{
"epoch": 0.98989898989899,
"grad_norm": 3.447392463684082,
"learning_rate": 3.391600454029512e-05,
"loss": 0.2809,
"step": 1470
},
{
"epoch": 0.9966329966329966,
"grad_norm": 3.6442668437957764,
"learning_rate": 3.380249716231555e-05,
"loss": 0.2705,
"step": 1480
},
{
"epoch": 1.0033670033670035,
"grad_norm": 3.358112335205078,
"learning_rate": 3.368898978433598e-05,
"loss": 0.2206,
"step": 1490
},
{
"epoch": 1.0101010101010102,
"grad_norm": 2.5414748191833496,
"learning_rate": 3.3575482406356414e-05,
"loss": 0.22,
"step": 1500
},
{
"epoch": 1.0168350168350169,
"grad_norm": 3.4823262691497803,
"learning_rate": 3.346197502837685e-05,
"loss": 0.1841,
"step": 1510
},
{
"epoch": 1.0235690235690236,
"grad_norm": 3.468315362930298,
"learning_rate": 3.3348467650397275e-05,
"loss": 0.2136,
"step": 1520
},
{
"epoch": 1.0303030303030303,
"grad_norm": 3.683201551437378,
"learning_rate": 3.323496027241771e-05,
"loss": 0.1949,
"step": 1530
},
{
"epoch": 1.037037037037037,
"grad_norm": 2.5732924938201904,
"learning_rate": 3.3121452894438135e-05,
"loss": 0.2017,
"step": 1540
},
{
"epoch": 1.0437710437710437,
"grad_norm": 4.662359714508057,
"learning_rate": 3.300794551645857e-05,
"loss": 0.224,
"step": 1550
},
{
"epoch": 1.0505050505050506,
"grad_norm": 3.25209379196167,
"learning_rate": 3.2894438138479e-05,
"loss": 0.1729,
"step": 1560
},
{
"epoch": 1.0572390572390573,
"grad_norm": 3.497758388519287,
"learning_rate": 3.2780930760499436e-05,
"loss": 0.2176,
"step": 1570
},
{
"epoch": 1.063973063973064,
"grad_norm": 3.802095890045166,
"learning_rate": 3.2667423382519863e-05,
"loss": 0.1802,
"step": 1580
},
{
"epoch": 1.0707070707070707,
"grad_norm": 3.487844705581665,
"learning_rate": 3.25539160045403e-05,
"loss": 0.2105,
"step": 1590
},
{
"epoch": 1.0774410774410774,
"grad_norm": 2.5473880767822266,
"learning_rate": 3.2440408626560724e-05,
"loss": 0.1848,
"step": 1600
},
{
"epoch": 1.0774410774410774,
"eval_loss": 0.46825486421585083,
"eval_runtime": 776.6351,
"eval_samples_per_second": 1.826,
"eval_steps_per_second": 0.115,
"eval_wer": 0.502912455028268,
"step": 1600
},
{
"epoch": 1.0841750841750841,
"grad_norm": 3.390531063079834,
"learning_rate": 3.232690124858116e-05,
"loss": 0.205,
"step": 1610
},
{
"epoch": 1.0909090909090908,
"grad_norm": 3.4687283039093018,
"learning_rate": 3.221339387060159e-05,
"loss": 0.1893,
"step": 1620
},
{
"epoch": 1.0976430976430978,
"grad_norm": 4.269082069396973,
"learning_rate": 3.2099886492622025e-05,
"loss": 0.1824,
"step": 1630
},
{
"epoch": 1.1043771043771045,
"grad_norm": 4.165797233581543,
"learning_rate": 3.198637911464245e-05,
"loss": 0.1989,
"step": 1640
},
{
"epoch": 1.1111111111111112,
"grad_norm": 3.5516631603240967,
"learning_rate": 3.1872871736662886e-05,
"loss": 0.1872,
"step": 1650
},
{
"epoch": 1.1178451178451179,
"grad_norm": 3.6036102771759033,
"learning_rate": 3.175936435868331e-05,
"loss": 0.2143,
"step": 1660
},
{
"epoch": 1.1245791245791246,
"grad_norm": 3.6270053386688232,
"learning_rate": 3.1645856980703746e-05,
"loss": 0.1806,
"step": 1670
},
{
"epoch": 1.1313131313131313,
"grad_norm": 3.623009204864502,
"learning_rate": 3.153234960272417e-05,
"loss": 0.1964,
"step": 1680
},
{
"epoch": 1.138047138047138,
"grad_norm": 3.477240800857544,
"learning_rate": 3.141884222474461e-05,
"loss": 0.2126,
"step": 1690
},
{
"epoch": 1.144781144781145,
"grad_norm": 4.374250411987305,
"learning_rate": 3.130533484676504e-05,
"loss": 0.2001,
"step": 1700
},
{
"epoch": 1.1515151515151516,
"grad_norm": 3.302889347076416,
"learning_rate": 3.1191827468785474e-05,
"loss": 0.1732,
"step": 1710
},
{
"epoch": 1.1582491582491583,
"grad_norm": 3.2889134883880615,
"learning_rate": 3.10783200908059e-05,
"loss": 0.2031,
"step": 1720
},
{
"epoch": 1.164983164983165,
"grad_norm": 3.4270858764648438,
"learning_rate": 3.0964812712826335e-05,
"loss": 0.1947,
"step": 1730
},
{
"epoch": 1.1717171717171717,
"grad_norm": 2.7580225467681885,
"learning_rate": 3.085130533484676e-05,
"loss": 0.1871,
"step": 1740
},
{
"epoch": 1.1784511784511784,
"grad_norm": 3.1256375312805176,
"learning_rate": 3.0737797956867195e-05,
"loss": 0.2017,
"step": 1750
},
{
"epoch": 1.1851851851851851,
"grad_norm": 2.583787441253662,
"learning_rate": 3.062429057888763e-05,
"loss": 0.2098,
"step": 1760
},
{
"epoch": 1.1919191919191918,
"grad_norm": 3.7047979831695557,
"learning_rate": 3.051078320090806e-05,
"loss": 0.2023,
"step": 1770
},
{
"epoch": 1.1986531986531987,
"grad_norm": 2.448273181915283,
"learning_rate": 3.039727582292849e-05,
"loss": 0.1836,
"step": 1780
},
{
"epoch": 1.2053872053872055,
"grad_norm": 2.4795892238616943,
"learning_rate": 3.0283768444948923e-05,
"loss": 0.1904,
"step": 1790
},
{
"epoch": 1.2121212121212122,
"grad_norm": 4.302123546600342,
"learning_rate": 3.0170261066969354e-05,
"loss": 0.1787,
"step": 1800
},
{
"epoch": 1.2188552188552189,
"grad_norm": 3.4514520168304443,
"learning_rate": 3.0056753688989787e-05,
"loss": 0.1779,
"step": 1810
},
{
"epoch": 1.2255892255892256,
"grad_norm": 4.459456443786621,
"learning_rate": 2.9943246311010214e-05,
"loss": 0.1958,
"step": 1820
},
{
"epoch": 1.2323232323232323,
"grad_norm": 2.8016040325164795,
"learning_rate": 2.9829738933030648e-05,
"loss": 0.2076,
"step": 1830
},
{
"epoch": 1.239057239057239,
"grad_norm": 2.360806941986084,
"learning_rate": 2.9716231555051078e-05,
"loss": 0.1778,
"step": 1840
},
{
"epoch": 1.2457912457912457,
"grad_norm": 3.3838906288146973,
"learning_rate": 2.9602724177071512e-05,
"loss": 0.2019,
"step": 1850
},
{
"epoch": 1.2525252525252526,
"grad_norm": 3.9138290882110596,
"learning_rate": 2.948921679909194e-05,
"loss": 0.1981,
"step": 1860
},
{
"epoch": 1.2592592592592593,
"grad_norm": 3.0798556804656982,
"learning_rate": 2.9375709421112372e-05,
"loss": 0.1718,
"step": 1870
},
{
"epoch": 1.265993265993266,
"grad_norm": 5.293337345123291,
"learning_rate": 2.9262202043132803e-05,
"loss": 0.1801,
"step": 1880
},
{
"epoch": 1.2727272727272727,
"grad_norm": 3.36914324760437,
"learning_rate": 2.9148694665153236e-05,
"loss": 0.2143,
"step": 1890
},
{
"epoch": 1.2794612794612794,
"grad_norm": 3.070244550704956,
"learning_rate": 2.9035187287173667e-05,
"loss": 0.1844,
"step": 1900
},
{
"epoch": 1.2861952861952861,
"grad_norm": 2.8654966354370117,
"learning_rate": 2.89216799091941e-05,
"loss": 0.1774,
"step": 1910
},
{
"epoch": 1.2929292929292928,
"grad_norm": 3.248065710067749,
"learning_rate": 2.8808172531214527e-05,
"loss": 0.1649,
"step": 1920
},
{
"epoch": 1.2996632996632997,
"grad_norm": 3.4403655529022217,
"learning_rate": 2.869466515323496e-05,
"loss": 0.1858,
"step": 1930
},
{
"epoch": 1.3063973063973064,
"grad_norm": 2.928788661956787,
"learning_rate": 2.858115777525539e-05,
"loss": 0.1986,
"step": 1940
},
{
"epoch": 1.3131313131313131,
"grad_norm": 2.984104871749878,
"learning_rate": 2.8467650397275825e-05,
"loss": 0.1874,
"step": 1950
},
{
"epoch": 1.3198653198653199,
"grad_norm": 3.16933536529541,
"learning_rate": 2.8354143019296252e-05,
"loss": 0.1954,
"step": 1960
},
{
"epoch": 1.3265993265993266,
"grad_norm": 3.3342158794403076,
"learning_rate": 2.8240635641316686e-05,
"loss": 0.1654,
"step": 1970
},
{
"epoch": 1.3333333333333333,
"grad_norm": 3.3244802951812744,
"learning_rate": 2.8127128263337116e-05,
"loss": 0.1461,
"step": 1980
},
{
"epoch": 1.34006734006734,
"grad_norm": 3.566857099533081,
"learning_rate": 2.801362088535755e-05,
"loss": 0.1732,
"step": 1990
},
{
"epoch": 1.3468013468013469,
"grad_norm": 2.2419066429138184,
"learning_rate": 2.790011350737798e-05,
"loss": 0.1612,
"step": 2000
},
{
"epoch": 1.3468013468013469,
"eval_loss": 0.470431387424469,
"eval_runtime": 778.9928,
"eval_samples_per_second": 1.82,
"eval_steps_per_second": 0.114,
"eval_wer": 0.49309005767803094,
"step": 2000
},
{
"epoch": 1.3535353535353536,
"grad_norm": 3.476229429244995,
"learning_rate": 2.7786606129398414e-05,
"loss": 0.1884,
"step": 2010
},
{
"epoch": 1.3602693602693603,
"grad_norm": 2.509948968887329,
"learning_rate": 2.767309875141884e-05,
"loss": 0.1647,
"step": 2020
},
{
"epoch": 1.367003367003367,
"grad_norm": 3.446333408355713,
"learning_rate": 2.7559591373439274e-05,
"loss": 0.1882,
"step": 2030
},
{
"epoch": 1.3737373737373737,
"grad_norm": 4.690558910369873,
"learning_rate": 2.7446083995459704e-05,
"loss": 0.1772,
"step": 2040
},
{
"epoch": 1.3804713804713804,
"grad_norm": 3.1924571990966797,
"learning_rate": 2.7332576617480138e-05,
"loss": 0.1598,
"step": 2050
},
{
"epoch": 1.387205387205387,
"grad_norm": 3.3819077014923096,
"learning_rate": 2.7219069239500565e-05,
"loss": 0.1778,
"step": 2060
},
{
"epoch": 1.393939393939394,
"grad_norm": 3.4989449977874756,
"learning_rate": 2.7105561861521002e-05,
"loss": 0.1613,
"step": 2070
},
{
"epoch": 1.4006734006734007,
"grad_norm": 3.041142225265503,
"learning_rate": 2.699205448354143e-05,
"loss": 0.1602,
"step": 2080
},
{
"epoch": 1.4074074074074074,
"grad_norm": 2.791797399520874,
"learning_rate": 2.6878547105561863e-05,
"loss": 0.1847,
"step": 2090
},
{
"epoch": 1.4141414141414141,
"grad_norm": 3.252044916152954,
"learning_rate": 2.6765039727582293e-05,
"loss": 0.1687,
"step": 2100
},
{
"epoch": 1.4208754208754208,
"grad_norm": 4.116684436798096,
"learning_rate": 2.6651532349602727e-05,
"loss": 0.1839,
"step": 2110
},
{
"epoch": 1.4276094276094276,
"grad_norm": 2.723188638687134,
"learning_rate": 2.6538024971623154e-05,
"loss": 0.1746,
"step": 2120
},
{
"epoch": 1.4343434343434343,
"grad_norm": 2.7226133346557617,
"learning_rate": 2.6424517593643587e-05,
"loss": 0.1621,
"step": 2130
},
{
"epoch": 1.4410774410774412,
"grad_norm": 4.077718734741211,
"learning_rate": 2.6311010215664018e-05,
"loss": 0.1765,
"step": 2140
},
{
"epoch": 1.4478114478114479,
"grad_norm": 3.0060672760009766,
"learning_rate": 2.619750283768445e-05,
"loss": 0.1658,
"step": 2150
},
{
"epoch": 1.4545454545454546,
"grad_norm": 3.187156915664673,
"learning_rate": 2.6083995459704878e-05,
"loss": 0.1722,
"step": 2160
},
{
"epoch": 1.4612794612794613,
"grad_norm": 2.6665992736816406,
"learning_rate": 2.5970488081725315e-05,
"loss": 0.1879,
"step": 2170
},
{
"epoch": 1.468013468013468,
"grad_norm": 3.637021541595459,
"learning_rate": 2.5856980703745742e-05,
"loss": 0.1657,
"step": 2180
},
{
"epoch": 1.4747474747474747,
"grad_norm": 3.3992161750793457,
"learning_rate": 2.5743473325766176e-05,
"loss": 0.2039,
"step": 2190
},
{
"epoch": 1.4814814814814814,
"grad_norm": 4.711835861206055,
"learning_rate": 2.5629965947786606e-05,
"loss": 0.1943,
"step": 2200
},
{
"epoch": 1.4882154882154883,
"grad_norm": 2.712679624557495,
"learning_rate": 2.551645856980704e-05,
"loss": 0.1671,
"step": 2210
},
{
"epoch": 1.494949494949495,
"grad_norm": 3.3363306522369385,
"learning_rate": 2.5402951191827467e-05,
"loss": 0.1669,
"step": 2220
},
{
"epoch": 1.5016835016835017,
"grad_norm": 3.961500406265259,
"learning_rate": 2.52894438138479e-05,
"loss": 0.1478,
"step": 2230
},
{
"epoch": 1.5084175084175084,
"grad_norm": 3.390343189239502,
"learning_rate": 2.517593643586833e-05,
"loss": 0.1744,
"step": 2240
},
{
"epoch": 1.5151515151515151,
"grad_norm": 3.3122527599334717,
"learning_rate": 2.5062429057888764e-05,
"loss": 0.182,
"step": 2250
},
{
"epoch": 1.5218855218855218,
"grad_norm": 3.9705302715301514,
"learning_rate": 2.4948921679909195e-05,
"loss": 0.1545,
"step": 2260
},
{
"epoch": 1.5286195286195285,
"grad_norm": 3.5639703273773193,
"learning_rate": 2.483541430192963e-05,
"loss": 0.1643,
"step": 2270
},
{
"epoch": 1.5353535353535355,
"grad_norm": 3.0987420082092285,
"learning_rate": 2.472190692395006e-05,
"loss": 0.1845,
"step": 2280
},
{
"epoch": 1.542087542087542,
"grad_norm": 3.1441290378570557,
"learning_rate": 2.460839954597049e-05,
"loss": 0.1515,
"step": 2290
},
{
"epoch": 1.5488215488215489,
"grad_norm": 3.7302119731903076,
"learning_rate": 2.4494892167990923e-05,
"loss": 0.1838,
"step": 2300
},
{
"epoch": 1.5555555555555556,
"grad_norm": 2.877547025680542,
"learning_rate": 2.4381384790011353e-05,
"loss": 0.1837,
"step": 2310
},
{
"epoch": 1.5622895622895623,
"grad_norm": 3.0840272903442383,
"learning_rate": 2.4267877412031783e-05,
"loss": 0.17,
"step": 2320
},
{
"epoch": 1.569023569023569,
"grad_norm": 2.3135063648223877,
"learning_rate": 2.4154370034052214e-05,
"loss": 0.1524,
"step": 2330
},
{
"epoch": 1.5757575757575757,
"grad_norm": 5.435102939605713,
"learning_rate": 2.4040862656072647e-05,
"loss": 0.1631,
"step": 2340
},
{
"epoch": 1.5824915824915826,
"grad_norm": 2.6250736713409424,
"learning_rate": 2.3927355278093077e-05,
"loss": 0.1748,
"step": 2350
},
{
"epoch": 1.589225589225589,
"grad_norm": 3.478433132171631,
"learning_rate": 2.3813847900113508e-05,
"loss": 0.1557,
"step": 2360
},
{
"epoch": 1.595959595959596,
"grad_norm": 2.924372673034668,
"learning_rate": 2.370034052213394e-05,
"loss": 0.1647,
"step": 2370
},
{
"epoch": 1.6026936026936027,
"grad_norm": 2.708841562271118,
"learning_rate": 2.3586833144154372e-05,
"loss": 0.2072,
"step": 2380
},
{
"epoch": 1.6094276094276094,
"grad_norm": 3.2418808937072754,
"learning_rate": 2.3473325766174802e-05,
"loss": 0.1705,
"step": 2390
},
{
"epoch": 1.6161616161616161,
"grad_norm": 2.908341884613037,
"learning_rate": 2.3359818388195236e-05,
"loss": 0.192,
"step": 2400
},
{
"epoch": 1.6161616161616161,
"eval_loss": 0.4698619246482849,
"eval_runtime": 721.5021,
"eval_samples_per_second": 1.965,
"eval_steps_per_second": 0.123,
"eval_wer": 0.49100565358917253,
"step": 2400
},
{
"epoch": 1.6228956228956228,
"grad_norm": 2.884115695953369,
"learning_rate": 2.3246311010215666e-05,
"loss": 0.17,
"step": 2410
},
{
"epoch": 1.6296296296296298,
"grad_norm": 3.3428990840911865,
"learning_rate": 2.3132803632236096e-05,
"loss": 0.1576,
"step": 2420
},
{
"epoch": 1.6363636363636362,
"grad_norm": 2.6130261421203613,
"learning_rate": 2.3019296254256527e-05,
"loss": 0.161,
"step": 2430
},
{
"epoch": 1.6430976430976432,
"grad_norm": 2.56199049949646,
"learning_rate": 2.290578887627696e-05,
"loss": 0.1741,
"step": 2440
},
{
"epoch": 1.6498316498316499,
"grad_norm": 3.0649795532226562,
"learning_rate": 2.279228149829739e-05,
"loss": 0.1662,
"step": 2450
},
{
"epoch": 1.6565656565656566,
"grad_norm": 2.2965986728668213,
"learning_rate": 2.267877412031782e-05,
"loss": 0.1717,
"step": 2460
},
{
"epoch": 1.6632996632996633,
"grad_norm": 3.121750831604004,
"learning_rate": 2.2565266742338255e-05,
"loss": 0.1577,
"step": 2470
},
{
"epoch": 1.67003367003367,
"grad_norm": 2.052502393722534,
"learning_rate": 2.2451759364358685e-05,
"loss": 0.1568,
"step": 2480
},
{
"epoch": 1.676767676767677,
"grad_norm": 2.4969053268432617,
"learning_rate": 2.2338251986379115e-05,
"loss": 0.153,
"step": 2490
},
{
"epoch": 1.6835016835016834,
"grad_norm": 2.811131715774536,
"learning_rate": 2.222474460839955e-05,
"loss": 0.1527,
"step": 2500
},
{
"epoch": 1.6902356902356903,
"grad_norm": 2.959965229034424,
"learning_rate": 2.211123723041998e-05,
"loss": 0.1501,
"step": 2510
},
{
"epoch": 1.696969696969697,
"grad_norm": 3.3598415851593018,
"learning_rate": 2.199772985244041e-05,
"loss": 0.1702,
"step": 2520
},
{
"epoch": 1.7037037037037037,
"grad_norm": 3.3592233657836914,
"learning_rate": 2.1884222474460843e-05,
"loss": 0.1584,
"step": 2530
},
{
"epoch": 1.7104377104377104,
"grad_norm": 3.0574469566345215,
"learning_rate": 2.1770715096481273e-05,
"loss": 0.1513,
"step": 2540
},
{
"epoch": 1.7171717171717171,
"grad_norm": 2.782938003540039,
"learning_rate": 2.1657207718501704e-05,
"loss": 0.1646,
"step": 2550
},
{
"epoch": 1.723905723905724,
"grad_norm": 2.9138362407684326,
"learning_rate": 2.1543700340522134e-05,
"loss": 0.1513,
"step": 2560
},
{
"epoch": 1.7306397306397305,
"grad_norm": 2.8213393688201904,
"learning_rate": 2.1430192962542568e-05,
"loss": 0.1683,
"step": 2570
},
{
"epoch": 1.7373737373737375,
"grad_norm": 3.486140489578247,
"learning_rate": 2.1316685584562998e-05,
"loss": 0.1713,
"step": 2580
},
{
"epoch": 1.7441077441077442,
"grad_norm": 2.8046581745147705,
"learning_rate": 2.1203178206583428e-05,
"loss": 0.1804,
"step": 2590
},
{
"epoch": 1.7508417508417509,
"grad_norm": 2.6458210945129395,
"learning_rate": 2.1089670828603862e-05,
"loss": 0.165,
"step": 2600
},
{
"epoch": 1.7575757575757576,
"grad_norm": 2.7271742820739746,
"learning_rate": 2.0976163450624292e-05,
"loss": 0.165,
"step": 2610
},
{
"epoch": 1.7643097643097643,
"grad_norm": 3.7697384357452393,
"learning_rate": 2.0862656072644723e-05,
"loss": 0.1944,
"step": 2620
},
{
"epoch": 1.7710437710437712,
"grad_norm": 2.6092400550842285,
"learning_rate": 2.0749148694665156e-05,
"loss": 0.1712,
"step": 2630
},
{
"epoch": 1.7777777777777777,
"grad_norm": 3.9321539402008057,
"learning_rate": 2.0635641316685587e-05,
"loss": 0.1718,
"step": 2640
},
{
"epoch": 1.7845117845117846,
"grad_norm": 3.0893261432647705,
"learning_rate": 2.0522133938706017e-05,
"loss": 0.1802,
"step": 2650
},
{
"epoch": 1.791245791245791,
"grad_norm": 3.8314249515533447,
"learning_rate": 2.0408626560726447e-05,
"loss": 0.1746,
"step": 2660
},
{
"epoch": 1.797979797979798,
"grad_norm": 3.2088515758514404,
"learning_rate": 2.029511918274688e-05,
"loss": 0.1693,
"step": 2670
},
{
"epoch": 1.8047138047138047,
"grad_norm": 3.136512517929077,
"learning_rate": 2.018161180476731e-05,
"loss": 0.1773,
"step": 2680
},
{
"epoch": 1.8114478114478114,
"grad_norm": 2.799889326095581,
"learning_rate": 2.006810442678774e-05,
"loss": 0.146,
"step": 2690
},
{
"epoch": 1.8181818181818183,
"grad_norm": 2.3367459774017334,
"learning_rate": 1.9954597048808175e-05,
"loss": 0.1561,
"step": 2700
},
{
"epoch": 1.8249158249158248,
"grad_norm": 3.626417636871338,
"learning_rate": 1.9841089670828605e-05,
"loss": 0.1532,
"step": 2710
},
{
"epoch": 1.8316498316498318,
"grad_norm": 3.480536460876465,
"learning_rate": 1.9727582292849036e-05,
"loss": 0.1493,
"step": 2720
},
{
"epoch": 1.8383838383838382,
"grad_norm": 2.8837146759033203,
"learning_rate": 1.961407491486947e-05,
"loss": 0.1475,
"step": 2730
},
{
"epoch": 1.8451178451178452,
"grad_norm": 2.784156322479248,
"learning_rate": 1.95005675368899e-05,
"loss": 0.1698,
"step": 2740
},
{
"epoch": 1.8518518518518519,
"grad_norm": 2.8038928508758545,
"learning_rate": 1.938706015891033e-05,
"loss": 0.1686,
"step": 2750
},
{
"epoch": 1.8585858585858586,
"grad_norm": 2.904350996017456,
"learning_rate": 1.9273552780930764e-05,
"loss": 0.1554,
"step": 2760
},
{
"epoch": 1.8653198653198653,
"grad_norm": 2.736264705657959,
"learning_rate": 1.9160045402951194e-05,
"loss": 0.1562,
"step": 2770
},
{
"epoch": 1.872053872053872,
"grad_norm": 3.001835584640503,
"learning_rate": 1.9046538024971624e-05,
"loss": 0.1567,
"step": 2780
},
{
"epoch": 1.878787878787879,
"grad_norm": 2.6082592010498047,
"learning_rate": 1.8933030646992055e-05,
"loss": 0.1573,
"step": 2790
},
{
"epoch": 1.8855218855218854,
"grad_norm": 3.1785757541656494,
"learning_rate": 1.8819523269012488e-05,
"loss": 0.1528,
"step": 2800
},
{
"epoch": 1.8855218855218854,
"eval_loss": 0.46750280261039734,
"eval_runtime": 783.2295,
"eval_samples_per_second": 1.81,
"eval_steps_per_second": 0.114,
"eval_wer": 0.48695105933413285,
"step": 2800
},
{
"epoch": 1.8922558922558923,
"grad_norm": 3.254110336303711,
"learning_rate": 1.870601589103292e-05,
"loss": 0.1492,
"step": 2810
},
{
"epoch": 1.898989898989899,
"grad_norm": 3.617150068283081,
"learning_rate": 1.859250851305335e-05,
"loss": 0.1524,
"step": 2820
},
{
"epoch": 1.9057239057239057,
"grad_norm": 2.7314984798431396,
"learning_rate": 1.8479001135073783e-05,
"loss": 0.1824,
"step": 2830
},
{
"epoch": 1.9124579124579124,
"grad_norm": 3.677401304244995,
"learning_rate": 1.8365493757094213e-05,
"loss": 0.1675,
"step": 2840
},
{
"epoch": 1.9191919191919191,
"grad_norm": 3.4799599647521973,
"learning_rate": 1.8251986379114643e-05,
"loss": 0.1936,
"step": 2850
},
{
"epoch": 1.925925925925926,
"grad_norm": 2.47420072555542,
"learning_rate": 1.8138479001135077e-05,
"loss": 0.1624,
"step": 2860
},
{
"epoch": 1.9326599326599325,
"grad_norm": 3.2847509384155273,
"learning_rate": 1.8024971623155507e-05,
"loss": 0.1867,
"step": 2870
},
{
"epoch": 1.9393939393939394,
"grad_norm": 2.4963037967681885,
"learning_rate": 1.7911464245175937e-05,
"loss": 0.1498,
"step": 2880
},
{
"epoch": 1.9461279461279462,
"grad_norm": 4.231179714202881,
"learning_rate": 1.7797956867196368e-05,
"loss": 0.1569,
"step": 2890
},
{
"epoch": 1.9528619528619529,
"grad_norm": 3.305777072906494,
"learning_rate": 1.76844494892168e-05,
"loss": 0.1647,
"step": 2900
},
{
"epoch": 1.9595959595959596,
"grad_norm": 2.858846664428711,
"learning_rate": 1.757094211123723e-05,
"loss": 0.1668,
"step": 2910
},
{
"epoch": 1.9663299663299663,
"grad_norm": 2.4449424743652344,
"learning_rate": 1.7457434733257662e-05,
"loss": 0.1506,
"step": 2920
},
{
"epoch": 1.9730639730639732,
"grad_norm": 2.5614805221557617,
"learning_rate": 1.7343927355278096e-05,
"loss": 0.1821,
"step": 2930
},
{
"epoch": 1.9797979797979797,
"grad_norm": 3.1182758808135986,
"learning_rate": 1.7230419977298526e-05,
"loss": 0.1709,
"step": 2940
},
{
"epoch": 1.9865319865319866,
"grad_norm": 3.463992118835449,
"learning_rate": 1.7116912599318956e-05,
"loss": 0.1479,
"step": 2950
},
{
"epoch": 1.9932659932659933,
"grad_norm": 2.1584393978118896,
"learning_rate": 1.700340522133939e-05,
"loss": 0.1431,
"step": 2960
},
{
"epoch": 2.0,
"grad_norm": 3.5880393981933594,
"learning_rate": 1.688989784335982e-05,
"loss": 0.1478,
"step": 2970
},
{
"epoch": 2.006734006734007,
"grad_norm": 1.8367834091186523,
"learning_rate": 1.677639046538025e-05,
"loss": 0.0986,
"step": 2980
},
{
"epoch": 2.0134680134680134,
"grad_norm": 2.266422748565674,
"learning_rate": 1.6662883087400684e-05,
"loss": 0.1031,
"step": 2990
},
{
"epoch": 2.0202020202020203,
"grad_norm": 2.440058708190918,
"learning_rate": 1.6549375709421114e-05,
"loss": 0.0995,
"step": 3000
},
{
"epoch": 2.026936026936027,
"grad_norm": 1.5215619802474976,
"learning_rate": 1.6435868331441545e-05,
"loss": 0.0937,
"step": 3010
},
{
"epoch": 2.0336700336700337,
"grad_norm": 3.0853044986724854,
"learning_rate": 1.6322360953461975e-05,
"loss": 0.1028,
"step": 3020
},
{
"epoch": 2.04040404040404,
"grad_norm": 2.2898178100585938,
"learning_rate": 1.620885357548241e-05,
"loss": 0.0971,
"step": 3030
},
{
"epoch": 2.047138047138047,
"grad_norm": 2.6617209911346436,
"learning_rate": 1.609534619750284e-05,
"loss": 0.1043,
"step": 3040
},
{
"epoch": 2.053872053872054,
"grad_norm": 3.225191593170166,
"learning_rate": 1.598183881952327e-05,
"loss": 0.0998,
"step": 3050
},
{
"epoch": 2.0606060606060606,
"grad_norm": 2.3820834159851074,
"learning_rate": 1.5868331441543703e-05,
"loss": 0.083,
"step": 3060
},
{
"epoch": 2.0673400673400675,
"grad_norm": 3.0194029808044434,
"learning_rate": 1.5754824063564133e-05,
"loss": 0.0909,
"step": 3070
},
{
"epoch": 2.074074074074074,
"grad_norm": 1.5243077278137207,
"learning_rate": 1.5641316685584564e-05,
"loss": 0.0877,
"step": 3080
},
{
"epoch": 2.080808080808081,
"grad_norm": 2.7908105850219727,
"learning_rate": 1.5527809307604997e-05,
"loss": 0.1003,
"step": 3090
},
{
"epoch": 2.0875420875420874,
"grad_norm": 2.368906259536743,
"learning_rate": 1.5414301929625428e-05,
"loss": 0.1013,
"step": 3100
},
{
"epoch": 2.0942760942760943,
"grad_norm": 1.6835886240005493,
"learning_rate": 1.5300794551645858e-05,
"loss": 0.1012,
"step": 3110
},
{
"epoch": 2.101010101010101,
"grad_norm": 2.943992853164673,
"learning_rate": 1.518728717366629e-05,
"loss": 0.0949,
"step": 3120
},
{
"epoch": 2.1077441077441077,
"grad_norm": 2.4449052810668945,
"learning_rate": 1.5073779795686722e-05,
"loss": 0.0967,
"step": 3130
},
{
"epoch": 2.1144781144781146,
"grad_norm": 2.521737813949585,
"learning_rate": 1.4960272417707152e-05,
"loss": 0.0933,
"step": 3140
},
{
"epoch": 2.121212121212121,
"grad_norm": 2.7859129905700684,
"learning_rate": 1.4846765039727584e-05,
"loss": 0.1091,
"step": 3150
},
{
"epoch": 2.127946127946128,
"grad_norm": 2.2307798862457275,
"learning_rate": 1.4733257661748014e-05,
"loss": 0.091,
"step": 3160
},
{
"epoch": 2.1346801346801345,
"grad_norm": 3.108671188354492,
"learning_rate": 1.4619750283768446e-05,
"loss": 0.1147,
"step": 3170
},
{
"epoch": 2.1414141414141414,
"grad_norm": 2.4862091541290283,
"learning_rate": 1.4506242905788878e-05,
"loss": 0.0951,
"step": 3180
},
{
"epoch": 2.148148148148148,
"grad_norm": 1.7988865375518799,
"learning_rate": 1.4392735527809309e-05,
"loss": 0.0963,
"step": 3190
},
{
"epoch": 2.154882154882155,
"grad_norm": 2.6203229427337646,
"learning_rate": 1.427922814982974e-05,
"loss": 0.0999,
"step": 3200
},
{
"epoch": 2.154882154882155,
"eval_loss": 0.47007495164871216,
"eval_runtime": 773.3558,
"eval_samples_per_second": 1.834,
"eval_steps_per_second": 0.115,
"eval_wer": 0.4653360744674776,
"step": 3200
},
{
"epoch": 2.1616161616161618,
"grad_norm": 3.4655826091766357,
"learning_rate": 1.4165720771850171e-05,
"loss": 0.1302,
"step": 3210
},
{
"epoch": 2.1683501683501682,
"grad_norm": 2.2620227336883545,
"learning_rate": 1.4052213393870603e-05,
"loss": 0.0917,
"step": 3220
},
{
"epoch": 2.175084175084175,
"grad_norm": 3.4524097442626953,
"learning_rate": 1.3938706015891035e-05,
"loss": 0.0977,
"step": 3230
},
{
"epoch": 2.1818181818181817,
"grad_norm": 3.0727145671844482,
"learning_rate": 1.3825198637911465e-05,
"loss": 0.0995,
"step": 3240
},
{
"epoch": 2.1885521885521886,
"grad_norm": 2.59820818901062,
"learning_rate": 1.3711691259931897e-05,
"loss": 0.0965,
"step": 3250
},
{
"epoch": 2.1952861952861955,
"grad_norm": 1.8692411184310913,
"learning_rate": 1.3598183881952328e-05,
"loss": 0.0973,
"step": 3260
},
{
"epoch": 2.202020202020202,
"grad_norm": 1.9435840845108032,
"learning_rate": 1.348467650397276e-05,
"loss": 0.1007,
"step": 3270
},
{
"epoch": 2.208754208754209,
"grad_norm": 3.7439959049224854,
"learning_rate": 1.3371169125993192e-05,
"loss": 0.0929,
"step": 3280
},
{
"epoch": 2.2154882154882154,
"grad_norm": 3.1171443462371826,
"learning_rate": 1.3257661748013622e-05,
"loss": 0.1129,
"step": 3290
},
{
"epoch": 2.2222222222222223,
"grad_norm": 3.671785593032837,
"learning_rate": 1.3144154370034054e-05,
"loss": 0.1068,
"step": 3300
},
{
"epoch": 2.228956228956229,
"grad_norm": 2.3654842376708984,
"learning_rate": 1.3030646992054484e-05,
"loss": 0.0851,
"step": 3310
},
{
"epoch": 2.2356902356902357,
"grad_norm": 3.868271589279175,
"learning_rate": 1.2917139614074916e-05,
"loss": 0.1121,
"step": 3320
},
{
"epoch": 2.242424242424242,
"grad_norm": 2.7278647422790527,
"learning_rate": 1.2803632236095348e-05,
"loss": 0.1068,
"step": 3330
},
{
"epoch": 2.249158249158249,
"grad_norm": 2.541274309158325,
"learning_rate": 1.2690124858115778e-05,
"loss": 0.1005,
"step": 3340
},
{
"epoch": 2.255892255892256,
"grad_norm": 2.2592976093292236,
"learning_rate": 1.257661748013621e-05,
"loss": 0.1014,
"step": 3350
},
{
"epoch": 2.2626262626262625,
"grad_norm": 1.714357614517212,
"learning_rate": 1.246311010215664e-05,
"loss": 0.0775,
"step": 3360
},
{
"epoch": 2.2693602693602695,
"grad_norm": 3.3454010486602783,
"learning_rate": 1.2349602724177071e-05,
"loss": 0.1026,
"step": 3370
},
{
"epoch": 2.276094276094276,
"grad_norm": 3.0652363300323486,
"learning_rate": 1.2236095346197503e-05,
"loss": 0.1227,
"step": 3380
},
{
"epoch": 2.282828282828283,
"grad_norm": 2.409959077835083,
"learning_rate": 1.2122587968217935e-05,
"loss": 0.1115,
"step": 3390
},
{
"epoch": 2.28956228956229,
"grad_norm": 3.0419325828552246,
"learning_rate": 1.2009080590238365e-05,
"loss": 0.0942,
"step": 3400
},
{
"epoch": 2.2962962962962963,
"grad_norm": 2.3572564125061035,
"learning_rate": 1.1895573212258797e-05,
"loss": 0.1034,
"step": 3410
},
{
"epoch": 2.303030303030303,
"grad_norm": 2.0597918033599854,
"learning_rate": 1.1782065834279228e-05,
"loss": 0.1169,
"step": 3420
},
{
"epoch": 2.3097643097643097,
"grad_norm": 1.6198811531066895,
"learning_rate": 1.166855845629966e-05,
"loss": 0.1073,
"step": 3430
},
{
"epoch": 2.3164983164983166,
"grad_norm": 2.385390520095825,
"learning_rate": 1.1555051078320092e-05,
"loss": 0.0913,
"step": 3440
},
{
"epoch": 2.323232323232323,
"grad_norm": 1.6714180707931519,
"learning_rate": 1.1441543700340522e-05,
"loss": 0.0964,
"step": 3450
},
{
"epoch": 2.32996632996633,
"grad_norm": 2.2347018718719482,
"learning_rate": 1.1328036322360954e-05,
"loss": 0.0948,
"step": 3460
},
{
"epoch": 2.3367003367003365,
"grad_norm": 1.7842698097229004,
"learning_rate": 1.1214528944381384e-05,
"loss": 0.0933,
"step": 3470
},
{
"epoch": 2.3434343434343434,
"grad_norm": 2.054187059402466,
"learning_rate": 1.1101021566401816e-05,
"loss": 0.0967,
"step": 3480
},
{
"epoch": 2.3501683501683504,
"grad_norm": 2.3955607414245605,
"learning_rate": 1.0987514188422248e-05,
"loss": 0.0789,
"step": 3490
},
{
"epoch": 2.356902356902357,
"grad_norm": 2.6920056343078613,
"learning_rate": 1.0874006810442678e-05,
"loss": 0.1126,
"step": 3500
},
{
"epoch": 2.3636363636363638,
"grad_norm": 2.0969793796539307,
"learning_rate": 1.076049943246311e-05,
"loss": 0.0909,
"step": 3510
},
{
"epoch": 2.3703703703703702,
"grad_norm": 2.8712689876556396,
"learning_rate": 1.064699205448354e-05,
"loss": 0.0948,
"step": 3520
},
{
"epoch": 2.377104377104377,
"grad_norm": 3.084336519241333,
"learning_rate": 1.0533484676503973e-05,
"loss": 0.1052,
"step": 3530
},
{
"epoch": 2.3838383838383836,
"grad_norm": 2.8842592239379883,
"learning_rate": 1.0419977298524405e-05,
"loss": 0.1051,
"step": 3540
},
{
"epoch": 2.3905723905723906,
"grad_norm": 1.8973740339279175,
"learning_rate": 1.0306469920544835e-05,
"loss": 0.1224,
"step": 3550
},
{
"epoch": 2.3973063973063975,
"grad_norm": 2.898562431335449,
"learning_rate": 1.0192962542565267e-05,
"loss": 0.117,
"step": 3560
},
{
"epoch": 2.404040404040404,
"grad_norm": 2.5222558975219727,
"learning_rate": 1.0079455164585697e-05,
"loss": 0.0975,
"step": 3570
},
{
"epoch": 2.410774410774411,
"grad_norm": 2.629905939102173,
"learning_rate": 9.96594778660613e-06,
"loss": 0.116,
"step": 3580
},
{
"epoch": 2.4175084175084174,
"grad_norm": 2.554290294647217,
"learning_rate": 9.852440408626561e-06,
"loss": 0.112,
"step": 3590
},
{
"epoch": 2.4242424242424243,
"grad_norm": 1.7490330934524536,
"learning_rate": 9.738933030646992e-06,
"loss": 0.088,
"step": 3600
},
{
"epoch": 2.4242424242424243,
"eval_loss": 0.4696303904056549,
"eval_runtime": 674.2707,
"eval_samples_per_second": 2.103,
"eval_steps_per_second": 0.132,
"eval_wer": 0.46716349723031236,
"step": 3600
},
{
"epoch": 2.430976430976431,
"grad_norm": 3.1101365089416504,
"learning_rate": 9.625425652667424e-06,
"loss": 0.0916,
"step": 3610
},
{
"epoch": 2.4377104377104377,
"grad_norm": 4.431212425231934,
"learning_rate": 9.511918274687854e-06,
"loss": 0.119,
"step": 3620
},
{
"epoch": 2.4444444444444446,
"grad_norm": 2.283841848373413,
"learning_rate": 9.398410896708286e-06,
"loss": 0.0921,
"step": 3630
},
{
"epoch": 2.451178451178451,
"grad_norm": 2.228675127029419,
"learning_rate": 9.284903518728718e-06,
"loss": 0.0835,
"step": 3640
},
{
"epoch": 2.457912457912458,
"grad_norm": 2.3716728687286377,
"learning_rate": 9.171396140749148e-06,
"loss": 0.0908,
"step": 3650
},
{
"epoch": 2.4646464646464645,
"grad_norm": 2.604325532913208,
"learning_rate": 9.05788876276958e-06,
"loss": 0.0971,
"step": 3660
},
{
"epoch": 2.4713804713804715,
"grad_norm": 2.1539206504821777,
"learning_rate": 8.944381384790012e-06,
"loss": 0.0815,
"step": 3670
},
{
"epoch": 2.478114478114478,
"grad_norm": 2.422910213470459,
"learning_rate": 8.830874006810442e-06,
"loss": 0.1085,
"step": 3680
},
{
"epoch": 2.484848484848485,
"grad_norm": 2.368211030960083,
"learning_rate": 8.717366628830874e-06,
"loss": 0.0947,
"step": 3690
},
{
"epoch": 2.4915824915824913,
"grad_norm": 1.828069806098938,
"learning_rate": 8.603859250851305e-06,
"loss": 0.0867,
"step": 3700
},
{
"epoch": 2.4983164983164983,
"grad_norm": 2.33329176902771,
"learning_rate": 8.490351872871737e-06,
"loss": 0.0941,
"step": 3710
},
{
"epoch": 2.505050505050505,
"grad_norm": 2.341047525405884,
"learning_rate": 8.376844494892169e-06,
"loss": 0.0915,
"step": 3720
},
{
"epoch": 2.5117845117845117,
"grad_norm": 1.9225627183914185,
"learning_rate": 8.263337116912599e-06,
"loss": 0.1044,
"step": 3730
},
{
"epoch": 2.5185185185185186,
"grad_norm": 2.387437105178833,
"learning_rate": 8.149829738933031e-06,
"loss": 0.0987,
"step": 3740
},
{
"epoch": 2.525252525252525,
"grad_norm": 2.9379942417144775,
"learning_rate": 8.036322360953461e-06,
"loss": 0.0871,
"step": 3750
},
{
"epoch": 2.531986531986532,
"grad_norm": 3.075242042541504,
"learning_rate": 7.922814982973893e-06,
"loss": 0.0962,
"step": 3760
},
{
"epoch": 2.538720538720539,
"grad_norm": 3.6734471321105957,
"learning_rate": 7.809307604994325e-06,
"loss": 0.0844,
"step": 3770
},
{
"epoch": 2.5454545454545454,
"grad_norm": 2.5898001194000244,
"learning_rate": 7.695800227014755e-06,
"loss": 0.0996,
"step": 3780
},
{
"epoch": 2.5521885521885523,
"grad_norm": 2.4215145111083984,
"learning_rate": 7.5822928490351875e-06,
"loss": 0.0857,
"step": 3790
},
{
"epoch": 2.558922558922559,
"grad_norm": 3.2795231342315674,
"learning_rate": 7.468785471055619e-06,
"loss": 0.1002,
"step": 3800
},
{
"epoch": 2.5656565656565657,
"grad_norm": 3.356985092163086,
"learning_rate": 7.35527809307605e-06,
"loss": 0.0956,
"step": 3810
},
{
"epoch": 2.5723905723905722,
"grad_norm": 1.5472785234451294,
"learning_rate": 7.241770715096481e-06,
"loss": 0.0937,
"step": 3820
},
{
"epoch": 2.579124579124579,
"grad_norm": 3.1027777194976807,
"learning_rate": 7.128263337116913e-06,
"loss": 0.1001,
"step": 3830
},
{
"epoch": 2.5858585858585856,
"grad_norm": 2.8028059005737305,
"learning_rate": 7.014755959137344e-06,
"loss": 0.1012,
"step": 3840
},
{
"epoch": 2.5925925925925926,
"grad_norm": 3.476177930831909,
"learning_rate": 6.901248581157775e-06,
"loss": 0.0843,
"step": 3850
},
{
"epoch": 2.5993265993265995,
"grad_norm": 1.6291272640228271,
"learning_rate": 6.787741203178206e-06,
"loss": 0.1141,
"step": 3860
},
{
"epoch": 2.606060606060606,
"grad_norm": 2.611839532852173,
"learning_rate": 6.6742338251986375e-06,
"loss": 0.0842,
"step": 3870
},
{
"epoch": 2.612794612794613,
"grad_norm": 2.290695905685425,
"learning_rate": 6.5607264472190694e-06,
"loss": 0.1037,
"step": 3880
},
{
"epoch": 2.6195286195286194,
"grad_norm": 2.7317962646484375,
"learning_rate": 6.447219069239501e-06,
"loss": 0.0967,
"step": 3890
},
{
"epoch": 2.6262626262626263,
"grad_norm": 2.8561346530914307,
"learning_rate": 6.333711691259932e-06,
"loss": 0.1026,
"step": 3900
},
{
"epoch": 2.6329966329966332,
"grad_norm": 1.468044638633728,
"learning_rate": 6.220204313280364e-06,
"loss": 0.0962,
"step": 3910
},
{
"epoch": 2.6397306397306397,
"grad_norm": 2.2081100940704346,
"learning_rate": 6.106696935300795e-06,
"loss": 0.1087,
"step": 3920
},
{
"epoch": 2.6464646464646466,
"grad_norm": 1.8171058893203735,
"learning_rate": 5.993189557321226e-06,
"loss": 0.1031,
"step": 3930
},
{
"epoch": 2.653198653198653,
"grad_norm": 2.296617269515991,
"learning_rate": 5.879682179341658e-06,
"loss": 0.1066,
"step": 3940
},
{
"epoch": 2.65993265993266,
"grad_norm": 2.02673077583313,
"learning_rate": 5.766174801362089e-06,
"loss": 0.0939,
"step": 3950
},
{
"epoch": 2.6666666666666665,
"grad_norm": 2.2023749351501465,
"learning_rate": 5.65266742338252e-06,
"loss": 0.1186,
"step": 3960
},
{
"epoch": 2.6734006734006734,
"grad_norm": 2.8352410793304443,
"learning_rate": 5.539160045402951e-06,
"loss": 0.0982,
"step": 3970
},
{
"epoch": 2.68013468013468,
"grad_norm": 2.6541831493377686,
"learning_rate": 5.425652667423383e-06,
"loss": 0.1005,
"step": 3980
},
{
"epoch": 2.686868686868687,
"grad_norm": 2.7797365188598633,
"learning_rate": 5.3121452894438146e-06,
"loss": 0.0946,
"step": 3990
},
{
"epoch": 2.6936026936026938,
"grad_norm": 1.8120551109313965,
"learning_rate": 5.198637911464246e-06,
"loss": 0.1003,
"step": 4000
},
{
"epoch": 2.6936026936026938,
"eval_loss": 0.4680774211883545,
"eval_runtime": 686.7326,
"eval_samples_per_second": 2.065,
"eval_steps_per_second": 0.13,
"eval_wer": 0.46013934098566617,
"step": 4000
},
{
"epoch": 2.7003367003367003,
"grad_norm": 2.1801674365997314,
"learning_rate": 5.085130533484677e-06,
"loss": 0.087,
"step": 4010
},
{
"epoch": 2.707070707070707,
"grad_norm": 2.256625175476074,
"learning_rate": 4.971623155505108e-06,
"loss": 0.0806,
"step": 4020
},
{
"epoch": 2.7138047138047137,
"grad_norm": 2.6446785926818848,
"learning_rate": 4.85811577752554e-06,
"loss": 0.1,
"step": 4030
},
{
"epoch": 2.7205387205387206,
"grad_norm": 1.7377904653549194,
"learning_rate": 4.744608399545971e-06,
"loss": 0.0923,
"step": 4040
},
{
"epoch": 2.7272727272727275,
"grad_norm": 1.9209539890289307,
"learning_rate": 4.631101021566402e-06,
"loss": 0.0992,
"step": 4050
},
{
"epoch": 2.734006734006734,
"grad_norm": 2.6267309188842773,
"learning_rate": 4.517593643586833e-06,
"loss": 0.0822,
"step": 4060
},
{
"epoch": 2.7407407407407405,
"grad_norm": 1.8967944383621216,
"learning_rate": 4.404086265607265e-06,
"loss": 0.0807,
"step": 4070
},
{
"epoch": 2.7474747474747474,
"grad_norm": 2.2259716987609863,
"learning_rate": 4.2905788876276965e-06,
"loss": 0.0794,
"step": 4080
},
{
"epoch": 2.7542087542087543,
"grad_norm": 2.3132541179656982,
"learning_rate": 4.177071509648128e-06,
"loss": 0.0835,
"step": 4090
},
{
"epoch": 2.760942760942761,
"grad_norm": 2.2206366062164307,
"learning_rate": 4.063564131668559e-06,
"loss": 0.0827,
"step": 4100
},
{
"epoch": 2.7676767676767677,
"grad_norm": 2.6239089965820312,
"learning_rate": 3.95005675368899e-06,
"loss": 0.0946,
"step": 4110
},
{
"epoch": 2.774410774410774,
"grad_norm": 2.0979490280151367,
"learning_rate": 3.836549375709422e-06,
"loss": 0.0682,
"step": 4120
},
{
"epoch": 2.781144781144781,
"grad_norm": 2.2430787086486816,
"learning_rate": 3.723041997729853e-06,
"loss": 0.089,
"step": 4130
},
{
"epoch": 2.787878787878788,
"grad_norm": 2.0071237087249756,
"learning_rate": 3.6095346197502842e-06,
"loss": 0.0892,
"step": 4140
},
{
"epoch": 2.7946127946127945,
"grad_norm": 2.0898067951202393,
"learning_rate": 3.4960272417707154e-06,
"loss": 0.0879,
"step": 4150
},
{
"epoch": 2.8013468013468015,
"grad_norm": 3.248400926589966,
"learning_rate": 3.382519863791147e-06,
"loss": 0.0928,
"step": 4160
},
{
"epoch": 2.808080808080808,
"grad_norm": 2.3027138710021973,
"learning_rate": 3.269012485811578e-06,
"loss": 0.0876,
"step": 4170
},
{
"epoch": 2.814814814814815,
"grad_norm": 2.523341417312622,
"learning_rate": 3.1555051078320097e-06,
"loss": 0.0902,
"step": 4180
},
{
"epoch": 2.821548821548822,
"grad_norm": 1.5260744094848633,
"learning_rate": 3.0419977298524404e-06,
"loss": 0.0773,
"step": 4190
},
{
"epoch": 2.8282828282828283,
"grad_norm": 1.5680999755859375,
"learning_rate": 2.928490351872872e-06,
"loss": 0.0862,
"step": 4200
},
{
"epoch": 2.8350168350168348,
"grad_norm": 2.547013759613037,
"learning_rate": 2.814982973893303e-06,
"loss": 0.0972,
"step": 4210
},
{
"epoch": 2.8417508417508417,
"grad_norm": 2.116196870803833,
"learning_rate": 2.7014755959137347e-06,
"loss": 0.0949,
"step": 4220
},
{
"epoch": 2.8484848484848486,
"grad_norm": 2.7395036220550537,
"learning_rate": 2.587968217934166e-06,
"loss": 0.094,
"step": 4230
},
{
"epoch": 2.855218855218855,
"grad_norm": 2.0705437660217285,
"learning_rate": 2.4744608399545974e-06,
"loss": 0.0991,
"step": 4240
},
{
"epoch": 2.861952861952862,
"grad_norm": 3.2586395740509033,
"learning_rate": 2.3609534619750285e-06,
"loss": 0.0959,
"step": 4250
},
{
"epoch": 2.8686868686868685,
"grad_norm": 2.0380172729492188,
"learning_rate": 2.2474460839954596e-06,
"loss": 0.091,
"step": 4260
},
{
"epoch": 2.8754208754208754,
"grad_norm": 1.8960984945297241,
"learning_rate": 2.1339387060158912e-06,
"loss": 0.0867,
"step": 4270
},
{
"epoch": 2.8821548821548824,
"grad_norm": 2.761885643005371,
"learning_rate": 2.0204313280363224e-06,
"loss": 0.0972,
"step": 4280
},
{
"epoch": 2.888888888888889,
"grad_norm": 2.1381499767303467,
"learning_rate": 1.9069239500567537e-06,
"loss": 0.0929,
"step": 4290
},
{
"epoch": 2.8956228956228958,
"grad_norm": 1.5396257638931274,
"learning_rate": 1.793416572077185e-06,
"loss": 0.088,
"step": 4300
},
{
"epoch": 2.9023569023569022,
"grad_norm": 2.4630839824676514,
"learning_rate": 1.6799091940976164e-06,
"loss": 0.1068,
"step": 4310
},
{
"epoch": 2.909090909090909,
"grad_norm": 1.8399533033370972,
"learning_rate": 1.5664018161180478e-06,
"loss": 0.0772,
"step": 4320
},
{
"epoch": 2.915824915824916,
"grad_norm": 2.0763957500457764,
"learning_rate": 1.4528944381384791e-06,
"loss": 0.0885,
"step": 4330
},
{
"epoch": 2.9225589225589226,
"grad_norm": 5.270694255828857,
"learning_rate": 1.3393870601589105e-06,
"loss": 0.0876,
"step": 4340
},
{
"epoch": 2.929292929292929,
"grad_norm": 2.339585542678833,
"learning_rate": 1.2258796821793418e-06,
"loss": 0.0829,
"step": 4350
},
{
"epoch": 2.936026936026936,
"grad_norm": 2.7977676391601562,
"learning_rate": 1.112372304199773e-06,
"loss": 0.1098,
"step": 4360
},
{
"epoch": 2.942760942760943,
"grad_norm": 2.1591367721557617,
"learning_rate": 9.988649262202043e-07,
"loss": 0.0917,
"step": 4370
},
{
"epoch": 2.9494949494949494,
"grad_norm": 2.4336767196655273,
"learning_rate": 8.853575482406357e-07,
"loss": 0.1013,
"step": 4380
},
{
"epoch": 2.9562289562289563,
"grad_norm": 1.9739155769348145,
"learning_rate": 7.718501702610669e-07,
"loss": 0.0731,
"step": 4390
},
{
"epoch": 2.962962962962963,
"grad_norm": 2.9581665992736816,
"learning_rate": 6.583427922814983e-07,
"loss": 0.1034,
"step": 4400
},
{
"epoch": 2.962962962962963,
"eval_loss": 0.4650237560272217,
"eval_runtime": 680.1256,
"eval_samples_per_second": 2.085,
"eval_steps_per_second": 0.131,
"eval_wer": 0.4528582034149963,
"step": 4400
}
],
"logging_steps": 10,
"max_steps": 4455,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 400,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.56588428967936e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}