{ "best_metric": 0.4528582034149963, "best_model_checkpoint": "d:\\DataTicon\\Whisper-Khmer-Small\\whisper-khmer\\outputs\\whisper-base-khmer\\checkpoint-4400", "epoch": 2.962962962962963, "eval_steps": 400, "global_step": 4400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.006734006734006734, "grad_norm": 22.712024688720703, "learning_rate": 7.000000000000001e-06, "loss": 2.8858, "step": 10 }, { "epoch": 0.013468013468013467, "grad_norm": 17.574142456054688, "learning_rate": 1.7000000000000003e-05, "loss": 2.2953, "step": 20 }, { "epoch": 0.020202020202020204, "grad_norm": 16.47330093383789, "learning_rate": 2.7000000000000002e-05, "loss": 1.8692, "step": 30 }, { "epoch": 0.026936026936026935, "grad_norm": 10.106539726257324, "learning_rate": 3.7e-05, "loss": 1.5765, "step": 40 }, { "epoch": 0.03367003367003367, "grad_norm": 15.11670970916748, "learning_rate": 4.7e-05, "loss": 1.4941, "step": 50 }, { "epoch": 0.04040404040404041, "grad_norm": 15.619561195373535, "learning_rate": 4.992054483541431e-05, "loss": 1.4231, "step": 60 }, { "epoch": 0.04713804713804714, "grad_norm": 11.616868019104004, "learning_rate": 4.9807037457434736e-05, "loss": 1.384, "step": 70 }, { "epoch": 0.05387205387205387, "grad_norm": 20.449440002441406, "learning_rate": 4.969353007945517e-05, "loss": 1.3234, "step": 80 }, { "epoch": 0.06060606060606061, "grad_norm": 12.725702285766602, "learning_rate": 4.95800227014756e-05, "loss": 1.328, "step": 90 }, { "epoch": 0.06734006734006734, "grad_norm": 15.14296817779541, "learning_rate": 4.946651532349603e-05, "loss": 1.3084, "step": 100 }, { "epoch": 0.07407407407407407, "grad_norm": 11.94970417022705, "learning_rate": 4.935300794551646e-05, "loss": 1.2537, "step": 110 }, { "epoch": 0.08080808080808081, "grad_norm": 8.479811668395996, "learning_rate": 4.92395005675369e-05, "loss": 1.2109, "step": 120 }, { "epoch": 0.08754208754208755, "grad_norm": 14.986007690429688, "learning_rate": 4.9125993189557325e-05, "loss": 1.1808, "step": 130 }, { "epoch": 0.09427609427609428, "grad_norm": 12.524420738220215, "learning_rate": 4.901248581157776e-05, "loss": 1.1299, "step": 140 }, { "epoch": 0.10101010101010101, "grad_norm": 12.007112503051758, "learning_rate": 4.8898978433598185e-05, "loss": 1.151, "step": 150 }, { "epoch": 0.10774410774410774, "grad_norm": 11.51995849609375, "learning_rate": 4.878547105561862e-05, "loss": 1.0585, "step": 160 }, { "epoch": 0.11447811447811448, "grad_norm": 9.454591751098633, "learning_rate": 4.8671963677639046e-05, "loss": 0.9909, "step": 170 }, { "epoch": 0.12121212121212122, "grad_norm": 9.037362098693848, "learning_rate": 4.855845629965948e-05, "loss": 0.9491, "step": 180 }, { "epoch": 0.12794612794612795, "grad_norm": 11.153435707092285, "learning_rate": 4.844494892167991e-05, "loss": 0.9348, "step": 190 }, { "epoch": 0.13468013468013468, "grad_norm": 10.488895416259766, "learning_rate": 4.833144154370035e-05, "loss": 0.8737, "step": 200 }, { "epoch": 0.1414141414141414, "grad_norm": 7.803547382354736, "learning_rate": 4.8217934165720774e-05, "loss": 0.7626, "step": 210 }, { "epoch": 0.14814814814814814, "grad_norm": 8.341965675354004, "learning_rate": 4.810442678774121e-05, "loss": 0.7547, "step": 220 }, { "epoch": 0.15488215488215487, "grad_norm": 8.866105079650879, "learning_rate": 4.7990919409761635e-05, "loss": 0.6779, "step": 230 }, { "epoch": 0.16161616161616163, "grad_norm": 11.127110481262207, "learning_rate": 4.787741203178207e-05, "loss": 0.7238, "step": 240 }, { "epoch": 0.16835016835016836, "grad_norm": 10.66511344909668, "learning_rate": 4.77639046538025e-05, "loss": 0.6632, "step": 250 }, { "epoch": 0.1750841750841751, "grad_norm": 9.223587989807129, "learning_rate": 4.7650397275822936e-05, "loss": 0.6478, "step": 260 }, { "epoch": 0.18181818181818182, "grad_norm": 8.034420013427734, "learning_rate": 4.753688989784336e-05, "loss": 0.6227, "step": 270 }, { "epoch": 0.18855218855218855, "grad_norm": 8.181520462036133, "learning_rate": 4.7423382519863796e-05, "loss": 0.6241, "step": 280 }, { "epoch": 0.19528619528619529, "grad_norm": 8.540548324584961, "learning_rate": 4.730987514188422e-05, "loss": 0.5726, "step": 290 }, { "epoch": 0.20202020202020202, "grad_norm": 10.086724281311035, "learning_rate": 4.719636776390466e-05, "loss": 0.5918, "step": 300 }, { "epoch": 0.20875420875420875, "grad_norm": 6.169092178344727, "learning_rate": 4.708286038592509e-05, "loss": 0.5442, "step": 310 }, { "epoch": 0.21548821548821548, "grad_norm": 8.274078369140625, "learning_rate": 4.6969353007945524e-05, "loss": 0.5636, "step": 320 }, { "epoch": 0.2222222222222222, "grad_norm": 7.014498710632324, "learning_rate": 4.685584562996595e-05, "loss": 0.5154, "step": 330 }, { "epoch": 0.22895622895622897, "grad_norm": 7.540900707244873, "learning_rate": 4.6742338251986385e-05, "loss": 0.5323, "step": 340 }, { "epoch": 0.2356902356902357, "grad_norm": 9.698654174804688, "learning_rate": 4.662883087400681e-05, "loss": 0.519, "step": 350 }, { "epoch": 0.24242424242424243, "grad_norm": 5.288636207580566, "learning_rate": 4.6515323496027245e-05, "loss": 0.4927, "step": 360 }, { "epoch": 0.24915824915824916, "grad_norm": 6.129817485809326, "learning_rate": 4.640181611804767e-05, "loss": 0.5281, "step": 370 }, { "epoch": 0.2558922558922559, "grad_norm": 8.628268241882324, "learning_rate": 4.6288308740068106e-05, "loss": 0.4962, "step": 380 }, { "epoch": 0.26262626262626265, "grad_norm": 7.04541015625, "learning_rate": 4.617480136208854e-05, "loss": 0.4874, "step": 390 }, { "epoch": 0.26936026936026936, "grad_norm": 6.490813732147217, "learning_rate": 4.606129398410897e-05, "loss": 0.4962, "step": 400 }, { "epoch": 0.26936026936026936, "eval_loss": 0.5467123985290527, "eval_runtime": 806.5112, "eval_samples_per_second": 1.758, "eval_steps_per_second": 0.11, "eval_wer": 0.5883444691907943, "step": 400 }, { "epoch": 0.2760942760942761, "grad_norm": 6.944798946380615, "learning_rate": 4.59477866061294e-05, "loss": 0.473, "step": 410 }, { "epoch": 0.2828282828282828, "grad_norm": 6.29971981048584, "learning_rate": 4.5834279228149834e-05, "loss": 0.4672, "step": 420 }, { "epoch": 0.2895622895622896, "grad_norm": 6.314589500427246, "learning_rate": 4.572077185017026e-05, "loss": 0.4657, "step": 430 }, { "epoch": 0.2962962962962963, "grad_norm": 5.14242696762085, "learning_rate": 4.5607264472190694e-05, "loss": 0.4613, "step": 440 }, { "epoch": 0.30303030303030304, "grad_norm": 6.764094352722168, "learning_rate": 4.549375709421113e-05, "loss": 0.4798, "step": 450 }, { "epoch": 0.30976430976430974, "grad_norm": 7.024701118469238, "learning_rate": 4.538024971623156e-05, "loss": 0.4509, "step": 460 }, { "epoch": 0.3164983164983165, "grad_norm": 4.914060592651367, "learning_rate": 4.526674233825199e-05, "loss": 0.4442, "step": 470 }, { "epoch": 0.32323232323232326, "grad_norm": 5.153116703033447, "learning_rate": 4.515323496027242e-05, "loss": 0.4368, "step": 480 }, { "epoch": 0.32996632996632996, "grad_norm": 6.269533157348633, "learning_rate": 4.503972758229285e-05, "loss": 0.4467, "step": 490 }, { "epoch": 0.3367003367003367, "grad_norm": 6.478705406188965, "learning_rate": 4.492622020431328e-05, "loss": 0.4453, "step": 500 }, { "epoch": 0.3434343434343434, "grad_norm": 5.625921249389648, "learning_rate": 4.481271282633372e-05, "loss": 0.3846, "step": 510 }, { "epoch": 0.3501683501683502, "grad_norm": 5.453153133392334, "learning_rate": 4.469920544835415e-05, "loss": 0.3811, "step": 520 }, { "epoch": 0.3569023569023569, "grad_norm": 6.992231369018555, "learning_rate": 4.458569807037458e-05, "loss": 0.4318, "step": 530 }, { "epoch": 0.36363636363636365, "grad_norm": 5.616722583770752, "learning_rate": 4.447219069239501e-05, "loss": 0.3774, "step": 540 }, { "epoch": 0.37037037037037035, "grad_norm": 4.499491214752197, "learning_rate": 4.435868331441544e-05, "loss": 0.4302, "step": 550 }, { "epoch": 0.3771043771043771, "grad_norm": 5.019254207611084, "learning_rate": 4.424517593643587e-05, "loss": 0.405, "step": 560 }, { "epoch": 0.3838383838383838, "grad_norm": 4.1655144691467285, "learning_rate": 4.41316685584563e-05, "loss": 0.3785, "step": 570 }, { "epoch": 0.39057239057239057, "grad_norm": 4.204577922821045, "learning_rate": 4.401816118047674e-05, "loss": 0.3917, "step": 580 }, { "epoch": 0.39730639730639733, "grad_norm": 5.208505153656006, "learning_rate": 4.3904653802497166e-05, "loss": 0.4067, "step": 590 }, { "epoch": 0.40404040404040403, "grad_norm": 5.3686418533325195, "learning_rate": 4.37911464245176e-05, "loss": 0.3595, "step": 600 }, { "epoch": 0.4107744107744108, "grad_norm": 4.19749641418457, "learning_rate": 4.3677639046538026e-05, "loss": 0.4, "step": 610 }, { "epoch": 0.4175084175084175, "grad_norm": 7.22583532333374, "learning_rate": 4.356413166855846e-05, "loss": 0.3517, "step": 620 }, { "epoch": 0.42424242424242425, "grad_norm": 5.825573921203613, "learning_rate": 4.345062429057889e-05, "loss": 0.4059, "step": 630 }, { "epoch": 0.43097643097643096, "grad_norm": 5.724638938903809, "learning_rate": 4.333711691259932e-05, "loss": 0.3847, "step": 640 }, { "epoch": 0.4377104377104377, "grad_norm": 7.2732954025268555, "learning_rate": 4.3223609534619754e-05, "loss": 0.3559, "step": 650 }, { "epoch": 0.4444444444444444, "grad_norm": 5.488597393035889, "learning_rate": 4.311010215664019e-05, "loss": 0.3833, "step": 660 }, { "epoch": 0.4511784511784512, "grad_norm": 6.655267715454102, "learning_rate": 4.2996594778660615e-05, "loss": 0.3468, "step": 670 }, { "epoch": 0.45791245791245794, "grad_norm": 6.509310245513916, "learning_rate": 4.288308740068105e-05, "loss": 0.3865, "step": 680 }, { "epoch": 0.46464646464646464, "grad_norm": 4.374180793762207, "learning_rate": 4.2769580022701476e-05, "loss": 0.3617, "step": 690 }, { "epoch": 0.4713804713804714, "grad_norm": 3.9861130714416504, "learning_rate": 4.265607264472191e-05, "loss": 0.3495, "step": 700 }, { "epoch": 0.4781144781144781, "grad_norm": 4.877681732177734, "learning_rate": 4.254256526674234e-05, "loss": 0.3662, "step": 710 }, { "epoch": 0.48484848484848486, "grad_norm": 4.442035675048828, "learning_rate": 4.242905788876278e-05, "loss": 0.3842, "step": 720 }, { "epoch": 0.49158249158249157, "grad_norm": 4.463146209716797, "learning_rate": 4.2315550510783204e-05, "loss": 0.3786, "step": 730 }, { "epoch": 0.4983164983164983, "grad_norm": 5.221556186676025, "learning_rate": 4.220204313280364e-05, "loss": 0.3684, "step": 740 }, { "epoch": 0.5050505050505051, "grad_norm": 5.097838878631592, "learning_rate": 4.2088535754824064e-05, "loss": 0.3175, "step": 750 }, { "epoch": 0.5117845117845118, "grad_norm": 4.413094997406006, "learning_rate": 4.19750283768445e-05, "loss": 0.3759, "step": 760 }, { "epoch": 0.5185185185185185, "grad_norm": 4.710460662841797, "learning_rate": 4.186152099886493e-05, "loss": 0.3746, "step": 770 }, { "epoch": 0.5252525252525253, "grad_norm": 4.644078254699707, "learning_rate": 4.1748013620885365e-05, "loss": 0.3613, "step": 780 }, { "epoch": 0.531986531986532, "grad_norm": 3.328462839126587, "learning_rate": 4.163450624290579e-05, "loss": 0.3326, "step": 790 }, { "epoch": 0.5387205387205387, "grad_norm": 4.52667236328125, "learning_rate": 4.1520998864926226e-05, "loss": 0.3349, "step": 800 }, { "epoch": 0.5387205387205387, "eval_loss": 0.49282562732696533, "eval_runtime": 1123.4733, "eval_samples_per_second": 1.262, "eval_steps_per_second": 0.079, "eval_wer": 0.5612186625549654, "step": 800 }, { "epoch": 0.5454545454545454, "grad_norm": 4.9449262619018555, "learning_rate": 4.140749148694665e-05, "loss": 0.3366, "step": 810 }, { "epoch": 0.5521885521885522, "grad_norm": 5.562499523162842, "learning_rate": 4.1293984108967086e-05, "loss": 0.3781, "step": 820 }, { "epoch": 0.5589225589225589, "grad_norm": 4.4809250831604, "learning_rate": 4.118047673098751e-05, "loss": 0.3607, "step": 830 }, { "epoch": 0.5656565656565656, "grad_norm": 4.371147155761719, "learning_rate": 4.106696935300795e-05, "loss": 0.3082, "step": 840 }, { "epoch": 0.5723905723905723, "grad_norm": 5.5584893226623535, "learning_rate": 4.095346197502838e-05, "loss": 0.3384, "step": 850 }, { "epoch": 0.5791245791245792, "grad_norm": 4.966277599334717, "learning_rate": 4.0839954597048814e-05, "loss": 0.3328, "step": 860 }, { "epoch": 0.5858585858585859, "grad_norm": 3.8009321689605713, "learning_rate": 4.072644721906924e-05, "loss": 0.3201, "step": 870 }, { "epoch": 0.5925925925925926, "grad_norm": 5.3277106285095215, "learning_rate": 4.0612939841089675e-05, "loss": 0.3345, "step": 880 }, { "epoch": 0.5993265993265994, "grad_norm": 4.464631080627441, "learning_rate": 4.04994324631101e-05, "loss": 0.3081, "step": 890 }, { "epoch": 0.6060606060606061, "grad_norm": 4.56332540512085, "learning_rate": 4.0385925085130536e-05, "loss": 0.3643, "step": 900 }, { "epoch": 0.6127946127946128, "grad_norm": 4.806687831878662, "learning_rate": 4.027241770715097e-05, "loss": 0.3662, "step": 910 }, { "epoch": 0.6195286195286195, "grad_norm": 5.562252998352051, "learning_rate": 4.01589103291714e-05, "loss": 0.3436, "step": 920 }, { "epoch": 0.6262626262626263, "grad_norm": 4.167670249938965, "learning_rate": 4.004540295119183e-05, "loss": 0.3238, "step": 930 }, { "epoch": 0.632996632996633, "grad_norm": 4.628272533416748, "learning_rate": 3.993189557321226e-05, "loss": 0.3351, "step": 940 }, { "epoch": 0.6397306397306397, "grad_norm": 6.1379828453063965, "learning_rate": 3.981838819523269e-05, "loss": 0.3199, "step": 950 }, { "epoch": 0.6464646464646465, "grad_norm": 4.196822643280029, "learning_rate": 3.970488081725312e-05, "loss": 0.3056, "step": 960 }, { "epoch": 0.6531986531986532, "grad_norm": 5.613431930541992, "learning_rate": 3.959137343927356e-05, "loss": 0.3292, "step": 970 }, { "epoch": 0.6599326599326599, "grad_norm": 4.543855667114258, "learning_rate": 3.9477866061293985e-05, "loss": 0.3001, "step": 980 }, { "epoch": 0.6666666666666666, "grad_norm": 3.637694835662842, "learning_rate": 3.936435868331442e-05, "loss": 0.2602, "step": 990 }, { "epoch": 0.6734006734006734, "grad_norm": 6.640063285827637, "learning_rate": 3.9250851305334845e-05, "loss": 0.3195, "step": 1000 }, { "epoch": 0.6801346801346801, "grad_norm": 4.616398334503174, "learning_rate": 3.913734392735528e-05, "loss": 0.3157, "step": 1010 }, { "epoch": 0.6868686868686869, "grad_norm": 5.17544412612915, "learning_rate": 3.9023836549375706e-05, "loss": 0.3079, "step": 1020 }, { "epoch": 0.6936026936026936, "grad_norm": 5.558164596557617, "learning_rate": 3.891032917139614e-05, "loss": 0.3093, "step": 1030 }, { "epoch": 0.7003367003367004, "grad_norm": 4.862564563751221, "learning_rate": 3.879682179341657e-05, "loss": 0.3255, "step": 1040 }, { "epoch": 0.7070707070707071, "grad_norm": 3.790825605392456, "learning_rate": 3.868331441543701e-05, "loss": 0.3331, "step": 1050 }, { "epoch": 0.7138047138047138, "grad_norm": 4.081621170043945, "learning_rate": 3.8569807037457434e-05, "loss": 0.275, "step": 1060 }, { "epoch": 0.7205387205387206, "grad_norm": 3.9744527339935303, "learning_rate": 3.845629965947787e-05, "loss": 0.2978, "step": 1070 }, { "epoch": 0.7272727272727273, "grad_norm": 4.056180953979492, "learning_rate": 3.8342792281498294e-05, "loss": 0.2912, "step": 1080 }, { "epoch": 0.734006734006734, "grad_norm": 5.50215482711792, "learning_rate": 3.822928490351873e-05, "loss": 0.2738, "step": 1090 }, { "epoch": 0.7407407407407407, "grad_norm": 6.039124488830566, "learning_rate": 3.811577752553916e-05, "loss": 0.2912, "step": 1100 }, { "epoch": 0.7474747474747475, "grad_norm": 4.475265979766846, "learning_rate": 3.8002270147559595e-05, "loss": 0.3059, "step": 1110 }, { "epoch": 0.7542087542087542, "grad_norm": 3.8151988983154297, "learning_rate": 3.788876276958002e-05, "loss": 0.2865, "step": 1120 }, { "epoch": 0.7609427609427609, "grad_norm": 4.704629898071289, "learning_rate": 3.7775255391600456e-05, "loss": 0.2938, "step": 1130 }, { "epoch": 0.7676767676767676, "grad_norm": 4.103381633758545, "learning_rate": 3.766174801362088e-05, "loss": 0.277, "step": 1140 }, { "epoch": 0.7744107744107744, "grad_norm": 5.573786735534668, "learning_rate": 3.754824063564132e-05, "loss": 0.3232, "step": 1150 }, { "epoch": 0.7811447811447811, "grad_norm": 3.373387575149536, "learning_rate": 3.743473325766175e-05, "loss": 0.3285, "step": 1160 }, { "epoch": 0.7878787878787878, "grad_norm": 3.7531933784484863, "learning_rate": 3.7321225879682184e-05, "loss": 0.2596, "step": 1170 }, { "epoch": 0.7946127946127947, "grad_norm": 5.006664752960205, "learning_rate": 3.720771850170261e-05, "loss": 0.2919, "step": 1180 }, { "epoch": 0.8013468013468014, "grad_norm": 6.7509307861328125, "learning_rate": 3.7094211123723045e-05, "loss": 0.2947, "step": 1190 }, { "epoch": 0.8080808080808081, "grad_norm": 3.8846304416656494, "learning_rate": 3.698070374574347e-05, "loss": 0.2906, "step": 1200 }, { "epoch": 0.8080808080808081, "eval_loss": 0.4824906289577484, "eval_runtime": 688.719, "eval_samples_per_second": 2.059, "eval_steps_per_second": 0.129, "eval_wer": 0.5195876877391354, "step": 1200 }, { "epoch": 0.8148148148148148, "grad_norm": 4.751924514770508, "learning_rate": 3.6867196367763905e-05, "loss": 0.2785, "step": 1210 }, { "epoch": 0.8215488215488216, "grad_norm": 3.7664754390716553, "learning_rate": 3.675368898978433e-05, "loss": 0.3289, "step": 1220 }, { "epoch": 0.8282828282828283, "grad_norm": 5.232487201690674, "learning_rate": 3.6640181611804766e-05, "loss": 0.3174, "step": 1230 }, { "epoch": 0.835016835016835, "grad_norm": 2.880322217941284, "learning_rate": 3.65266742338252e-05, "loss": 0.2686, "step": 1240 }, { "epoch": 0.8417508417508418, "grad_norm": 4.3191609382629395, "learning_rate": 3.641316685584563e-05, "loss": 0.3059, "step": 1250 }, { "epoch": 0.8484848484848485, "grad_norm": 3.35304594039917, "learning_rate": 3.629965947786606e-05, "loss": 0.2693, "step": 1260 }, { "epoch": 0.8552188552188552, "grad_norm": 3.989720582962036, "learning_rate": 3.6186152099886494e-05, "loss": 0.2748, "step": 1270 }, { "epoch": 0.8619528619528619, "grad_norm": 2.795743942260742, "learning_rate": 3.607264472190692e-05, "loss": 0.2608, "step": 1280 }, { "epoch": 0.8686868686868687, "grad_norm": 5.1076226234436035, "learning_rate": 3.5959137343927354e-05, "loss": 0.2979, "step": 1290 }, { "epoch": 0.8754208754208754, "grad_norm": 3.131528854370117, "learning_rate": 3.584562996594779e-05, "loss": 0.2865, "step": 1300 }, { "epoch": 0.8821548821548821, "grad_norm": 6.658942699432373, "learning_rate": 3.573212258796822e-05, "loss": 0.2727, "step": 1310 }, { "epoch": 0.8888888888888888, "grad_norm": 4.044928073883057, "learning_rate": 3.561861520998865e-05, "loss": 0.2913, "step": 1320 }, { "epoch": 0.8956228956228957, "grad_norm": 4.87237024307251, "learning_rate": 3.550510783200908e-05, "loss": 0.2485, "step": 1330 }, { "epoch": 0.9023569023569024, "grad_norm": 3.8377342224121094, "learning_rate": 3.539160045402951e-05, "loss": 0.2707, "step": 1340 }, { "epoch": 0.9090909090909091, "grad_norm": 3.848212957382202, "learning_rate": 3.527809307604994e-05, "loss": 0.2375, "step": 1350 }, { "epoch": 0.9158249158249159, "grad_norm": 3.686363697052002, "learning_rate": 3.5164585698070377e-05, "loss": 0.2597, "step": 1360 }, { "epoch": 0.9225589225589226, "grad_norm": 4.444821834564209, "learning_rate": 3.505107832009081e-05, "loss": 0.2599, "step": 1370 }, { "epoch": 0.9292929292929293, "grad_norm": 4.101839542388916, "learning_rate": 3.493757094211124e-05, "loss": 0.297, "step": 1380 }, { "epoch": 0.936026936026936, "grad_norm": 4.912603855133057, "learning_rate": 3.482406356413167e-05, "loss": 0.2557, "step": 1390 }, { "epoch": 0.9427609427609428, "grad_norm": 4.1229248046875, "learning_rate": 3.47105561861521e-05, "loss": 0.271, "step": 1400 }, { "epoch": 0.9494949494949495, "grad_norm": 3.668956756591797, "learning_rate": 3.459704880817253e-05, "loss": 0.286, "step": 1410 }, { "epoch": 0.9562289562289562, "grad_norm": 5.052644729614258, "learning_rate": 3.448354143019296e-05, "loss": 0.2841, "step": 1420 }, { "epoch": 0.9629629629629629, "grad_norm": 3.9958231449127197, "learning_rate": 3.43700340522134e-05, "loss": 0.2473, "step": 1430 }, { "epoch": 0.9696969696969697, "grad_norm": 4.527822971343994, "learning_rate": 3.4256526674233826e-05, "loss": 0.2867, "step": 1440 }, { "epoch": 0.9764309764309764, "grad_norm": 3.6779627799987793, "learning_rate": 3.414301929625426e-05, "loss": 0.2547, "step": 1450 }, { "epoch": 0.9831649831649831, "grad_norm": 4.0302581787109375, "learning_rate": 3.4029511918274686e-05, "loss": 0.2697, "step": 1460 }, { "epoch": 0.98989898989899, "grad_norm": 3.447392463684082, "learning_rate": 3.391600454029512e-05, "loss": 0.2809, "step": 1470 }, { "epoch": 0.9966329966329966, "grad_norm": 3.6442668437957764, "learning_rate": 3.380249716231555e-05, "loss": 0.2705, "step": 1480 }, { "epoch": 1.0033670033670035, "grad_norm": 3.358112335205078, "learning_rate": 3.368898978433598e-05, "loss": 0.2206, "step": 1490 }, { "epoch": 1.0101010101010102, "grad_norm": 2.5414748191833496, "learning_rate": 3.3575482406356414e-05, "loss": 0.22, "step": 1500 }, { "epoch": 1.0168350168350169, "grad_norm": 3.4823262691497803, "learning_rate": 3.346197502837685e-05, "loss": 0.1841, "step": 1510 }, { "epoch": 1.0235690235690236, "grad_norm": 3.468315362930298, "learning_rate": 3.3348467650397275e-05, "loss": 0.2136, "step": 1520 }, { "epoch": 1.0303030303030303, "grad_norm": 3.683201551437378, "learning_rate": 3.323496027241771e-05, "loss": 0.1949, "step": 1530 }, { "epoch": 1.037037037037037, "grad_norm": 2.5732924938201904, "learning_rate": 3.3121452894438135e-05, "loss": 0.2017, "step": 1540 }, { "epoch": 1.0437710437710437, "grad_norm": 4.662359714508057, "learning_rate": 3.300794551645857e-05, "loss": 0.224, "step": 1550 }, { "epoch": 1.0505050505050506, "grad_norm": 3.25209379196167, "learning_rate": 3.2894438138479e-05, "loss": 0.1729, "step": 1560 }, { "epoch": 1.0572390572390573, "grad_norm": 3.497758388519287, "learning_rate": 3.2780930760499436e-05, "loss": 0.2176, "step": 1570 }, { "epoch": 1.063973063973064, "grad_norm": 3.802095890045166, "learning_rate": 3.2667423382519863e-05, "loss": 0.1802, "step": 1580 }, { "epoch": 1.0707070707070707, "grad_norm": 3.487844705581665, "learning_rate": 3.25539160045403e-05, "loss": 0.2105, "step": 1590 }, { "epoch": 1.0774410774410774, "grad_norm": 2.5473880767822266, "learning_rate": 3.2440408626560724e-05, "loss": 0.1848, "step": 1600 }, { "epoch": 1.0774410774410774, "eval_loss": 0.46825486421585083, "eval_runtime": 776.6351, "eval_samples_per_second": 1.826, "eval_steps_per_second": 0.115, "eval_wer": 0.502912455028268, "step": 1600 }, { "epoch": 1.0841750841750841, "grad_norm": 3.390531063079834, "learning_rate": 3.232690124858116e-05, "loss": 0.205, "step": 1610 }, { "epoch": 1.0909090909090908, "grad_norm": 3.4687283039093018, "learning_rate": 3.221339387060159e-05, "loss": 0.1893, "step": 1620 }, { "epoch": 1.0976430976430978, "grad_norm": 4.269082069396973, "learning_rate": 3.2099886492622025e-05, "loss": 0.1824, "step": 1630 }, { "epoch": 1.1043771043771045, "grad_norm": 4.165797233581543, "learning_rate": 3.198637911464245e-05, "loss": 0.1989, "step": 1640 }, { "epoch": 1.1111111111111112, "grad_norm": 3.5516631603240967, "learning_rate": 3.1872871736662886e-05, "loss": 0.1872, "step": 1650 }, { "epoch": 1.1178451178451179, "grad_norm": 3.6036102771759033, "learning_rate": 3.175936435868331e-05, "loss": 0.2143, "step": 1660 }, { "epoch": 1.1245791245791246, "grad_norm": 3.6270053386688232, "learning_rate": 3.1645856980703746e-05, "loss": 0.1806, "step": 1670 }, { "epoch": 1.1313131313131313, "grad_norm": 3.623009204864502, "learning_rate": 3.153234960272417e-05, "loss": 0.1964, "step": 1680 }, { "epoch": 1.138047138047138, "grad_norm": 3.477240800857544, "learning_rate": 3.141884222474461e-05, "loss": 0.2126, "step": 1690 }, { "epoch": 1.144781144781145, "grad_norm": 4.374250411987305, "learning_rate": 3.130533484676504e-05, "loss": 0.2001, "step": 1700 }, { "epoch": 1.1515151515151516, "grad_norm": 3.302889347076416, "learning_rate": 3.1191827468785474e-05, "loss": 0.1732, "step": 1710 }, { "epoch": 1.1582491582491583, "grad_norm": 3.2889134883880615, "learning_rate": 3.10783200908059e-05, "loss": 0.2031, "step": 1720 }, { "epoch": 1.164983164983165, "grad_norm": 3.4270858764648438, "learning_rate": 3.0964812712826335e-05, "loss": 0.1947, "step": 1730 }, { "epoch": 1.1717171717171717, "grad_norm": 2.7580225467681885, "learning_rate": 3.085130533484676e-05, "loss": 0.1871, "step": 1740 }, { "epoch": 1.1784511784511784, "grad_norm": 3.1256375312805176, "learning_rate": 3.0737797956867195e-05, "loss": 0.2017, "step": 1750 }, { "epoch": 1.1851851851851851, "grad_norm": 2.583787441253662, "learning_rate": 3.062429057888763e-05, "loss": 0.2098, "step": 1760 }, { "epoch": 1.1919191919191918, "grad_norm": 3.7047979831695557, "learning_rate": 3.051078320090806e-05, "loss": 0.2023, "step": 1770 }, { "epoch": 1.1986531986531987, "grad_norm": 2.448273181915283, "learning_rate": 3.039727582292849e-05, "loss": 0.1836, "step": 1780 }, { "epoch": 1.2053872053872055, "grad_norm": 2.4795892238616943, "learning_rate": 3.0283768444948923e-05, "loss": 0.1904, "step": 1790 }, { "epoch": 1.2121212121212122, "grad_norm": 4.302123546600342, "learning_rate": 3.0170261066969354e-05, "loss": 0.1787, "step": 1800 }, { "epoch": 1.2188552188552189, "grad_norm": 3.4514520168304443, "learning_rate": 3.0056753688989787e-05, "loss": 0.1779, "step": 1810 }, { "epoch": 1.2255892255892256, "grad_norm": 4.459456443786621, "learning_rate": 2.9943246311010214e-05, "loss": 0.1958, "step": 1820 }, { "epoch": 1.2323232323232323, "grad_norm": 2.8016040325164795, "learning_rate": 2.9829738933030648e-05, "loss": 0.2076, "step": 1830 }, { "epoch": 1.239057239057239, "grad_norm": 2.360806941986084, "learning_rate": 2.9716231555051078e-05, "loss": 0.1778, "step": 1840 }, { "epoch": 1.2457912457912457, "grad_norm": 3.3838906288146973, "learning_rate": 2.9602724177071512e-05, "loss": 0.2019, "step": 1850 }, { "epoch": 1.2525252525252526, "grad_norm": 3.9138290882110596, "learning_rate": 2.948921679909194e-05, "loss": 0.1981, "step": 1860 }, { "epoch": 1.2592592592592593, "grad_norm": 3.0798556804656982, "learning_rate": 2.9375709421112372e-05, "loss": 0.1718, "step": 1870 }, { "epoch": 1.265993265993266, "grad_norm": 5.293337345123291, "learning_rate": 2.9262202043132803e-05, "loss": 0.1801, "step": 1880 }, { "epoch": 1.2727272727272727, "grad_norm": 3.36914324760437, "learning_rate": 2.9148694665153236e-05, "loss": 0.2143, "step": 1890 }, { "epoch": 1.2794612794612794, "grad_norm": 3.070244550704956, "learning_rate": 2.9035187287173667e-05, "loss": 0.1844, "step": 1900 }, { "epoch": 1.2861952861952861, "grad_norm": 2.8654966354370117, "learning_rate": 2.89216799091941e-05, "loss": 0.1774, "step": 1910 }, { "epoch": 1.2929292929292928, "grad_norm": 3.248065710067749, "learning_rate": 2.8808172531214527e-05, "loss": 0.1649, "step": 1920 }, { "epoch": 1.2996632996632997, "grad_norm": 3.4403655529022217, "learning_rate": 2.869466515323496e-05, "loss": 0.1858, "step": 1930 }, { "epoch": 1.3063973063973064, "grad_norm": 2.928788661956787, "learning_rate": 2.858115777525539e-05, "loss": 0.1986, "step": 1940 }, { "epoch": 1.3131313131313131, "grad_norm": 2.984104871749878, "learning_rate": 2.8467650397275825e-05, "loss": 0.1874, "step": 1950 }, { "epoch": 1.3198653198653199, "grad_norm": 3.16933536529541, "learning_rate": 2.8354143019296252e-05, "loss": 0.1954, "step": 1960 }, { "epoch": 1.3265993265993266, "grad_norm": 3.3342158794403076, "learning_rate": 2.8240635641316686e-05, "loss": 0.1654, "step": 1970 }, { "epoch": 1.3333333333333333, "grad_norm": 3.3244802951812744, "learning_rate": 2.8127128263337116e-05, "loss": 0.1461, "step": 1980 }, { "epoch": 1.34006734006734, "grad_norm": 3.566857099533081, "learning_rate": 2.801362088535755e-05, "loss": 0.1732, "step": 1990 }, { "epoch": 1.3468013468013469, "grad_norm": 2.2419066429138184, "learning_rate": 2.790011350737798e-05, "loss": 0.1612, "step": 2000 }, { "epoch": 1.3468013468013469, "eval_loss": 0.470431387424469, "eval_runtime": 778.9928, "eval_samples_per_second": 1.82, "eval_steps_per_second": 0.114, "eval_wer": 0.49309005767803094, "step": 2000 }, { "epoch": 1.3535353535353536, "grad_norm": 3.476229429244995, "learning_rate": 2.7786606129398414e-05, "loss": 0.1884, "step": 2010 }, { "epoch": 1.3602693602693603, "grad_norm": 2.509948968887329, "learning_rate": 2.767309875141884e-05, "loss": 0.1647, "step": 2020 }, { "epoch": 1.367003367003367, "grad_norm": 3.446333408355713, "learning_rate": 2.7559591373439274e-05, "loss": 0.1882, "step": 2030 }, { "epoch": 1.3737373737373737, "grad_norm": 4.690558910369873, "learning_rate": 2.7446083995459704e-05, "loss": 0.1772, "step": 2040 }, { "epoch": 1.3804713804713804, "grad_norm": 3.1924571990966797, "learning_rate": 2.7332576617480138e-05, "loss": 0.1598, "step": 2050 }, { "epoch": 1.387205387205387, "grad_norm": 3.3819077014923096, "learning_rate": 2.7219069239500565e-05, "loss": 0.1778, "step": 2060 }, { "epoch": 1.393939393939394, "grad_norm": 3.4989449977874756, "learning_rate": 2.7105561861521002e-05, "loss": 0.1613, "step": 2070 }, { "epoch": 1.4006734006734007, "grad_norm": 3.041142225265503, "learning_rate": 2.699205448354143e-05, "loss": 0.1602, "step": 2080 }, { "epoch": 1.4074074074074074, "grad_norm": 2.791797399520874, "learning_rate": 2.6878547105561863e-05, "loss": 0.1847, "step": 2090 }, { "epoch": 1.4141414141414141, "grad_norm": 3.252044916152954, "learning_rate": 2.6765039727582293e-05, "loss": 0.1687, "step": 2100 }, { "epoch": 1.4208754208754208, "grad_norm": 4.116684436798096, "learning_rate": 2.6651532349602727e-05, "loss": 0.1839, "step": 2110 }, { "epoch": 1.4276094276094276, "grad_norm": 2.723188638687134, "learning_rate": 2.6538024971623154e-05, "loss": 0.1746, "step": 2120 }, { "epoch": 1.4343434343434343, "grad_norm": 2.7226133346557617, "learning_rate": 2.6424517593643587e-05, "loss": 0.1621, "step": 2130 }, { "epoch": 1.4410774410774412, "grad_norm": 4.077718734741211, "learning_rate": 2.6311010215664018e-05, "loss": 0.1765, "step": 2140 }, { "epoch": 1.4478114478114479, "grad_norm": 3.0060672760009766, "learning_rate": 2.619750283768445e-05, "loss": 0.1658, "step": 2150 }, { "epoch": 1.4545454545454546, "grad_norm": 3.187156915664673, "learning_rate": 2.6083995459704878e-05, "loss": 0.1722, "step": 2160 }, { "epoch": 1.4612794612794613, "grad_norm": 2.6665992736816406, "learning_rate": 2.5970488081725315e-05, "loss": 0.1879, "step": 2170 }, { "epoch": 1.468013468013468, "grad_norm": 3.637021541595459, "learning_rate": 2.5856980703745742e-05, "loss": 0.1657, "step": 2180 }, { "epoch": 1.4747474747474747, "grad_norm": 3.3992161750793457, "learning_rate": 2.5743473325766176e-05, "loss": 0.2039, "step": 2190 }, { "epoch": 1.4814814814814814, "grad_norm": 4.711835861206055, "learning_rate": 2.5629965947786606e-05, "loss": 0.1943, "step": 2200 }, { "epoch": 1.4882154882154883, "grad_norm": 2.712679624557495, "learning_rate": 2.551645856980704e-05, "loss": 0.1671, "step": 2210 }, { "epoch": 1.494949494949495, "grad_norm": 3.3363306522369385, "learning_rate": 2.5402951191827467e-05, "loss": 0.1669, "step": 2220 }, { "epoch": 1.5016835016835017, "grad_norm": 3.961500406265259, "learning_rate": 2.52894438138479e-05, "loss": 0.1478, "step": 2230 }, { "epoch": 1.5084175084175084, "grad_norm": 3.390343189239502, "learning_rate": 2.517593643586833e-05, "loss": 0.1744, "step": 2240 }, { "epoch": 1.5151515151515151, "grad_norm": 3.3122527599334717, "learning_rate": 2.5062429057888764e-05, "loss": 0.182, "step": 2250 }, { "epoch": 1.5218855218855218, "grad_norm": 3.9705302715301514, "learning_rate": 2.4948921679909195e-05, "loss": 0.1545, "step": 2260 }, { "epoch": 1.5286195286195285, "grad_norm": 3.5639703273773193, "learning_rate": 2.483541430192963e-05, "loss": 0.1643, "step": 2270 }, { "epoch": 1.5353535353535355, "grad_norm": 3.0987420082092285, "learning_rate": 2.472190692395006e-05, "loss": 0.1845, "step": 2280 }, { "epoch": 1.542087542087542, "grad_norm": 3.1441290378570557, "learning_rate": 2.460839954597049e-05, "loss": 0.1515, "step": 2290 }, { "epoch": 1.5488215488215489, "grad_norm": 3.7302119731903076, "learning_rate": 2.4494892167990923e-05, "loss": 0.1838, "step": 2300 }, { "epoch": 1.5555555555555556, "grad_norm": 2.877547025680542, "learning_rate": 2.4381384790011353e-05, "loss": 0.1837, "step": 2310 }, { "epoch": 1.5622895622895623, "grad_norm": 3.0840272903442383, "learning_rate": 2.4267877412031783e-05, "loss": 0.17, "step": 2320 }, { "epoch": 1.569023569023569, "grad_norm": 2.3135063648223877, "learning_rate": 2.4154370034052214e-05, "loss": 0.1524, "step": 2330 }, { "epoch": 1.5757575757575757, "grad_norm": 5.435102939605713, "learning_rate": 2.4040862656072647e-05, "loss": 0.1631, "step": 2340 }, { "epoch": 1.5824915824915826, "grad_norm": 2.6250736713409424, "learning_rate": 2.3927355278093077e-05, "loss": 0.1748, "step": 2350 }, { "epoch": 1.589225589225589, "grad_norm": 3.478433132171631, "learning_rate": 2.3813847900113508e-05, "loss": 0.1557, "step": 2360 }, { "epoch": 1.595959595959596, "grad_norm": 2.924372673034668, "learning_rate": 2.370034052213394e-05, "loss": 0.1647, "step": 2370 }, { "epoch": 1.6026936026936027, "grad_norm": 2.708841562271118, "learning_rate": 2.3586833144154372e-05, "loss": 0.2072, "step": 2380 }, { "epoch": 1.6094276094276094, "grad_norm": 3.2418808937072754, "learning_rate": 2.3473325766174802e-05, "loss": 0.1705, "step": 2390 }, { "epoch": 1.6161616161616161, "grad_norm": 2.908341884613037, "learning_rate": 2.3359818388195236e-05, "loss": 0.192, "step": 2400 }, { "epoch": 1.6161616161616161, "eval_loss": 0.4698619246482849, "eval_runtime": 721.5021, "eval_samples_per_second": 1.965, "eval_steps_per_second": 0.123, "eval_wer": 0.49100565358917253, "step": 2400 }, { "epoch": 1.6228956228956228, "grad_norm": 2.884115695953369, "learning_rate": 2.3246311010215666e-05, "loss": 0.17, "step": 2410 }, { "epoch": 1.6296296296296298, "grad_norm": 3.3428990840911865, "learning_rate": 2.3132803632236096e-05, "loss": 0.1576, "step": 2420 }, { "epoch": 1.6363636363636362, "grad_norm": 2.6130261421203613, "learning_rate": 2.3019296254256527e-05, "loss": 0.161, "step": 2430 }, { "epoch": 1.6430976430976432, "grad_norm": 2.56199049949646, "learning_rate": 2.290578887627696e-05, "loss": 0.1741, "step": 2440 }, { "epoch": 1.6498316498316499, "grad_norm": 3.0649795532226562, "learning_rate": 2.279228149829739e-05, "loss": 0.1662, "step": 2450 }, { "epoch": 1.6565656565656566, "grad_norm": 2.2965986728668213, "learning_rate": 2.267877412031782e-05, "loss": 0.1717, "step": 2460 }, { "epoch": 1.6632996632996633, "grad_norm": 3.121750831604004, "learning_rate": 2.2565266742338255e-05, "loss": 0.1577, "step": 2470 }, { "epoch": 1.67003367003367, "grad_norm": 2.052502393722534, "learning_rate": 2.2451759364358685e-05, "loss": 0.1568, "step": 2480 }, { "epoch": 1.676767676767677, "grad_norm": 2.4969053268432617, "learning_rate": 2.2338251986379115e-05, "loss": 0.153, "step": 2490 }, { "epoch": 1.6835016835016834, "grad_norm": 2.811131715774536, "learning_rate": 2.222474460839955e-05, "loss": 0.1527, "step": 2500 }, { "epoch": 1.6902356902356903, "grad_norm": 2.959965229034424, "learning_rate": 2.211123723041998e-05, "loss": 0.1501, "step": 2510 }, { "epoch": 1.696969696969697, "grad_norm": 3.3598415851593018, "learning_rate": 2.199772985244041e-05, "loss": 0.1702, "step": 2520 }, { "epoch": 1.7037037037037037, "grad_norm": 3.3592233657836914, "learning_rate": 2.1884222474460843e-05, "loss": 0.1584, "step": 2530 }, { "epoch": 1.7104377104377104, "grad_norm": 3.0574469566345215, "learning_rate": 2.1770715096481273e-05, "loss": 0.1513, "step": 2540 }, { "epoch": 1.7171717171717171, "grad_norm": 2.782938003540039, "learning_rate": 2.1657207718501704e-05, "loss": 0.1646, "step": 2550 }, { "epoch": 1.723905723905724, "grad_norm": 2.9138362407684326, "learning_rate": 2.1543700340522134e-05, "loss": 0.1513, "step": 2560 }, { "epoch": 1.7306397306397305, "grad_norm": 2.8213393688201904, "learning_rate": 2.1430192962542568e-05, "loss": 0.1683, "step": 2570 }, { "epoch": 1.7373737373737375, "grad_norm": 3.486140489578247, "learning_rate": 2.1316685584562998e-05, "loss": 0.1713, "step": 2580 }, { "epoch": 1.7441077441077442, "grad_norm": 2.8046581745147705, "learning_rate": 2.1203178206583428e-05, "loss": 0.1804, "step": 2590 }, { "epoch": 1.7508417508417509, "grad_norm": 2.6458210945129395, "learning_rate": 2.1089670828603862e-05, "loss": 0.165, "step": 2600 }, { "epoch": 1.7575757575757576, "grad_norm": 2.7271742820739746, "learning_rate": 2.0976163450624292e-05, "loss": 0.165, "step": 2610 }, { "epoch": 1.7643097643097643, "grad_norm": 3.7697384357452393, "learning_rate": 2.0862656072644723e-05, "loss": 0.1944, "step": 2620 }, { "epoch": 1.7710437710437712, "grad_norm": 2.6092400550842285, "learning_rate": 2.0749148694665156e-05, "loss": 0.1712, "step": 2630 }, { "epoch": 1.7777777777777777, "grad_norm": 3.9321539402008057, "learning_rate": 2.0635641316685587e-05, "loss": 0.1718, "step": 2640 }, { "epoch": 1.7845117845117846, "grad_norm": 3.0893261432647705, "learning_rate": 2.0522133938706017e-05, "loss": 0.1802, "step": 2650 }, { "epoch": 1.791245791245791, "grad_norm": 3.8314249515533447, "learning_rate": 2.0408626560726447e-05, "loss": 0.1746, "step": 2660 }, { "epoch": 1.797979797979798, "grad_norm": 3.2088515758514404, "learning_rate": 2.029511918274688e-05, "loss": 0.1693, "step": 2670 }, { "epoch": 1.8047138047138047, "grad_norm": 3.136512517929077, "learning_rate": 2.018161180476731e-05, "loss": 0.1773, "step": 2680 }, { "epoch": 1.8114478114478114, "grad_norm": 2.799889326095581, "learning_rate": 2.006810442678774e-05, "loss": 0.146, "step": 2690 }, { "epoch": 1.8181818181818183, "grad_norm": 2.3367459774017334, "learning_rate": 1.9954597048808175e-05, "loss": 0.1561, "step": 2700 }, { "epoch": 1.8249158249158248, "grad_norm": 3.626417636871338, "learning_rate": 1.9841089670828605e-05, "loss": 0.1532, "step": 2710 }, { "epoch": 1.8316498316498318, "grad_norm": 3.480536460876465, "learning_rate": 1.9727582292849036e-05, "loss": 0.1493, "step": 2720 }, { "epoch": 1.8383838383838382, "grad_norm": 2.8837146759033203, "learning_rate": 1.961407491486947e-05, "loss": 0.1475, "step": 2730 }, { "epoch": 1.8451178451178452, "grad_norm": 2.784156322479248, "learning_rate": 1.95005675368899e-05, "loss": 0.1698, "step": 2740 }, { "epoch": 1.8518518518518519, "grad_norm": 2.8038928508758545, "learning_rate": 1.938706015891033e-05, "loss": 0.1686, "step": 2750 }, { "epoch": 1.8585858585858586, "grad_norm": 2.904350996017456, "learning_rate": 1.9273552780930764e-05, "loss": 0.1554, "step": 2760 }, { "epoch": 1.8653198653198653, "grad_norm": 2.736264705657959, "learning_rate": 1.9160045402951194e-05, "loss": 0.1562, "step": 2770 }, { "epoch": 1.872053872053872, "grad_norm": 3.001835584640503, "learning_rate": 1.9046538024971624e-05, "loss": 0.1567, "step": 2780 }, { "epoch": 1.878787878787879, "grad_norm": 2.6082592010498047, "learning_rate": 1.8933030646992055e-05, "loss": 0.1573, "step": 2790 }, { "epoch": 1.8855218855218854, "grad_norm": 3.1785757541656494, "learning_rate": 1.8819523269012488e-05, "loss": 0.1528, "step": 2800 }, { "epoch": 1.8855218855218854, "eval_loss": 0.46750280261039734, "eval_runtime": 783.2295, "eval_samples_per_second": 1.81, "eval_steps_per_second": 0.114, "eval_wer": 0.48695105933413285, "step": 2800 }, { "epoch": 1.8922558922558923, "grad_norm": 3.254110336303711, "learning_rate": 1.870601589103292e-05, "loss": 0.1492, "step": 2810 }, { "epoch": 1.898989898989899, "grad_norm": 3.617150068283081, "learning_rate": 1.859250851305335e-05, "loss": 0.1524, "step": 2820 }, { "epoch": 1.9057239057239057, "grad_norm": 2.7314984798431396, "learning_rate": 1.8479001135073783e-05, "loss": 0.1824, "step": 2830 }, { "epoch": 1.9124579124579124, "grad_norm": 3.677401304244995, "learning_rate": 1.8365493757094213e-05, "loss": 0.1675, "step": 2840 }, { "epoch": 1.9191919191919191, "grad_norm": 3.4799599647521973, "learning_rate": 1.8251986379114643e-05, "loss": 0.1936, "step": 2850 }, { "epoch": 1.925925925925926, "grad_norm": 2.47420072555542, "learning_rate": 1.8138479001135077e-05, "loss": 0.1624, "step": 2860 }, { "epoch": 1.9326599326599325, "grad_norm": 3.2847509384155273, "learning_rate": 1.8024971623155507e-05, "loss": 0.1867, "step": 2870 }, { "epoch": 1.9393939393939394, "grad_norm": 2.4963037967681885, "learning_rate": 1.7911464245175937e-05, "loss": 0.1498, "step": 2880 }, { "epoch": 1.9461279461279462, "grad_norm": 4.231179714202881, "learning_rate": 1.7797956867196368e-05, "loss": 0.1569, "step": 2890 }, { "epoch": 1.9528619528619529, "grad_norm": 3.305777072906494, "learning_rate": 1.76844494892168e-05, "loss": 0.1647, "step": 2900 }, { "epoch": 1.9595959595959596, "grad_norm": 2.858846664428711, "learning_rate": 1.757094211123723e-05, "loss": 0.1668, "step": 2910 }, { "epoch": 1.9663299663299663, "grad_norm": 2.4449424743652344, "learning_rate": 1.7457434733257662e-05, "loss": 0.1506, "step": 2920 }, { "epoch": 1.9730639730639732, "grad_norm": 2.5614805221557617, "learning_rate": 1.7343927355278096e-05, "loss": 0.1821, "step": 2930 }, { "epoch": 1.9797979797979797, "grad_norm": 3.1182758808135986, "learning_rate": 1.7230419977298526e-05, "loss": 0.1709, "step": 2940 }, { "epoch": 1.9865319865319866, "grad_norm": 3.463992118835449, "learning_rate": 1.7116912599318956e-05, "loss": 0.1479, "step": 2950 }, { "epoch": 1.9932659932659933, "grad_norm": 2.1584393978118896, "learning_rate": 1.700340522133939e-05, "loss": 0.1431, "step": 2960 }, { "epoch": 2.0, "grad_norm": 3.5880393981933594, "learning_rate": 1.688989784335982e-05, "loss": 0.1478, "step": 2970 }, { "epoch": 2.006734006734007, "grad_norm": 1.8367834091186523, "learning_rate": 1.677639046538025e-05, "loss": 0.0986, "step": 2980 }, { "epoch": 2.0134680134680134, "grad_norm": 2.266422748565674, "learning_rate": 1.6662883087400684e-05, "loss": 0.1031, "step": 2990 }, { "epoch": 2.0202020202020203, "grad_norm": 2.440058708190918, "learning_rate": 1.6549375709421114e-05, "loss": 0.0995, "step": 3000 }, { "epoch": 2.026936026936027, "grad_norm": 1.5215619802474976, "learning_rate": 1.6435868331441545e-05, "loss": 0.0937, "step": 3010 }, { "epoch": 2.0336700336700337, "grad_norm": 3.0853044986724854, "learning_rate": 1.6322360953461975e-05, "loss": 0.1028, "step": 3020 }, { "epoch": 2.04040404040404, "grad_norm": 2.2898178100585938, "learning_rate": 1.620885357548241e-05, "loss": 0.0971, "step": 3030 }, { "epoch": 2.047138047138047, "grad_norm": 2.6617209911346436, "learning_rate": 1.609534619750284e-05, "loss": 0.1043, "step": 3040 }, { "epoch": 2.053872053872054, "grad_norm": 3.225191593170166, "learning_rate": 1.598183881952327e-05, "loss": 0.0998, "step": 3050 }, { "epoch": 2.0606060606060606, "grad_norm": 2.3820834159851074, "learning_rate": 1.5868331441543703e-05, "loss": 0.083, "step": 3060 }, { "epoch": 2.0673400673400675, "grad_norm": 3.0194029808044434, "learning_rate": 1.5754824063564133e-05, "loss": 0.0909, "step": 3070 }, { "epoch": 2.074074074074074, "grad_norm": 1.5243077278137207, "learning_rate": 1.5641316685584564e-05, "loss": 0.0877, "step": 3080 }, { "epoch": 2.080808080808081, "grad_norm": 2.7908105850219727, "learning_rate": 1.5527809307604997e-05, "loss": 0.1003, "step": 3090 }, { "epoch": 2.0875420875420874, "grad_norm": 2.368906259536743, "learning_rate": 1.5414301929625428e-05, "loss": 0.1013, "step": 3100 }, { "epoch": 2.0942760942760943, "grad_norm": 1.6835886240005493, "learning_rate": 1.5300794551645858e-05, "loss": 0.1012, "step": 3110 }, { "epoch": 2.101010101010101, "grad_norm": 2.943992853164673, "learning_rate": 1.518728717366629e-05, "loss": 0.0949, "step": 3120 }, { "epoch": 2.1077441077441077, "grad_norm": 2.4449052810668945, "learning_rate": 1.5073779795686722e-05, "loss": 0.0967, "step": 3130 }, { "epoch": 2.1144781144781146, "grad_norm": 2.521737813949585, "learning_rate": 1.4960272417707152e-05, "loss": 0.0933, "step": 3140 }, { "epoch": 2.121212121212121, "grad_norm": 2.7859129905700684, "learning_rate": 1.4846765039727584e-05, "loss": 0.1091, "step": 3150 }, { "epoch": 2.127946127946128, "grad_norm": 2.2307798862457275, "learning_rate": 1.4733257661748014e-05, "loss": 0.091, "step": 3160 }, { "epoch": 2.1346801346801345, "grad_norm": 3.108671188354492, "learning_rate": 1.4619750283768446e-05, "loss": 0.1147, "step": 3170 }, { "epoch": 2.1414141414141414, "grad_norm": 2.4862091541290283, "learning_rate": 1.4506242905788878e-05, "loss": 0.0951, "step": 3180 }, { "epoch": 2.148148148148148, "grad_norm": 1.7988865375518799, "learning_rate": 1.4392735527809309e-05, "loss": 0.0963, "step": 3190 }, { "epoch": 2.154882154882155, "grad_norm": 2.6203229427337646, "learning_rate": 1.427922814982974e-05, "loss": 0.0999, "step": 3200 }, { "epoch": 2.154882154882155, "eval_loss": 0.47007495164871216, "eval_runtime": 773.3558, "eval_samples_per_second": 1.834, "eval_steps_per_second": 0.115, "eval_wer": 0.4653360744674776, "step": 3200 }, { "epoch": 2.1616161616161618, "grad_norm": 3.4655826091766357, "learning_rate": 1.4165720771850171e-05, "loss": 0.1302, "step": 3210 }, { "epoch": 2.1683501683501682, "grad_norm": 2.2620227336883545, "learning_rate": 1.4052213393870603e-05, "loss": 0.0917, "step": 3220 }, { "epoch": 2.175084175084175, "grad_norm": 3.4524097442626953, "learning_rate": 1.3938706015891035e-05, "loss": 0.0977, "step": 3230 }, { "epoch": 2.1818181818181817, "grad_norm": 3.0727145671844482, "learning_rate": 1.3825198637911465e-05, "loss": 0.0995, "step": 3240 }, { "epoch": 2.1885521885521886, "grad_norm": 2.59820818901062, "learning_rate": 1.3711691259931897e-05, "loss": 0.0965, "step": 3250 }, { "epoch": 2.1952861952861955, "grad_norm": 1.8692411184310913, "learning_rate": 1.3598183881952328e-05, "loss": 0.0973, "step": 3260 }, { "epoch": 2.202020202020202, "grad_norm": 1.9435840845108032, "learning_rate": 1.348467650397276e-05, "loss": 0.1007, "step": 3270 }, { "epoch": 2.208754208754209, "grad_norm": 3.7439959049224854, "learning_rate": 1.3371169125993192e-05, "loss": 0.0929, "step": 3280 }, { "epoch": 2.2154882154882154, "grad_norm": 3.1171443462371826, "learning_rate": 1.3257661748013622e-05, "loss": 0.1129, "step": 3290 }, { "epoch": 2.2222222222222223, "grad_norm": 3.671785593032837, "learning_rate": 1.3144154370034054e-05, "loss": 0.1068, "step": 3300 }, { "epoch": 2.228956228956229, "grad_norm": 2.3654842376708984, "learning_rate": 1.3030646992054484e-05, "loss": 0.0851, "step": 3310 }, { "epoch": 2.2356902356902357, "grad_norm": 3.868271589279175, "learning_rate": 1.2917139614074916e-05, "loss": 0.1121, "step": 3320 }, { "epoch": 2.242424242424242, "grad_norm": 2.7278647422790527, "learning_rate": 1.2803632236095348e-05, "loss": 0.1068, "step": 3330 }, { "epoch": 2.249158249158249, "grad_norm": 2.541274309158325, "learning_rate": 1.2690124858115778e-05, "loss": 0.1005, "step": 3340 }, { "epoch": 2.255892255892256, "grad_norm": 2.2592976093292236, "learning_rate": 1.257661748013621e-05, "loss": 0.1014, "step": 3350 }, { "epoch": 2.2626262626262625, "grad_norm": 1.714357614517212, "learning_rate": 1.246311010215664e-05, "loss": 0.0775, "step": 3360 }, { "epoch": 2.2693602693602695, "grad_norm": 3.3454010486602783, "learning_rate": 1.2349602724177071e-05, "loss": 0.1026, "step": 3370 }, { "epoch": 2.276094276094276, "grad_norm": 3.0652363300323486, "learning_rate": 1.2236095346197503e-05, "loss": 0.1227, "step": 3380 }, { "epoch": 2.282828282828283, "grad_norm": 2.409959077835083, "learning_rate": 1.2122587968217935e-05, "loss": 0.1115, "step": 3390 }, { "epoch": 2.28956228956229, "grad_norm": 3.0419325828552246, "learning_rate": 1.2009080590238365e-05, "loss": 0.0942, "step": 3400 }, { "epoch": 2.2962962962962963, "grad_norm": 2.3572564125061035, "learning_rate": 1.1895573212258797e-05, "loss": 0.1034, "step": 3410 }, { "epoch": 2.303030303030303, "grad_norm": 2.0597918033599854, "learning_rate": 1.1782065834279228e-05, "loss": 0.1169, "step": 3420 }, { "epoch": 2.3097643097643097, "grad_norm": 1.6198811531066895, "learning_rate": 1.166855845629966e-05, "loss": 0.1073, "step": 3430 }, { "epoch": 2.3164983164983166, "grad_norm": 2.385390520095825, "learning_rate": 1.1555051078320092e-05, "loss": 0.0913, "step": 3440 }, { "epoch": 2.323232323232323, "grad_norm": 1.6714180707931519, "learning_rate": 1.1441543700340522e-05, "loss": 0.0964, "step": 3450 }, { "epoch": 2.32996632996633, "grad_norm": 2.2347018718719482, "learning_rate": 1.1328036322360954e-05, "loss": 0.0948, "step": 3460 }, { "epoch": 2.3367003367003365, "grad_norm": 1.7842698097229004, "learning_rate": 1.1214528944381384e-05, "loss": 0.0933, "step": 3470 }, { "epoch": 2.3434343434343434, "grad_norm": 2.054187059402466, "learning_rate": 1.1101021566401816e-05, "loss": 0.0967, "step": 3480 }, { "epoch": 2.3501683501683504, "grad_norm": 2.3955607414245605, "learning_rate": 1.0987514188422248e-05, "loss": 0.0789, "step": 3490 }, { "epoch": 2.356902356902357, "grad_norm": 2.6920056343078613, "learning_rate": 1.0874006810442678e-05, "loss": 0.1126, "step": 3500 }, { "epoch": 2.3636363636363638, "grad_norm": 2.0969793796539307, "learning_rate": 1.076049943246311e-05, "loss": 0.0909, "step": 3510 }, { "epoch": 2.3703703703703702, "grad_norm": 2.8712689876556396, "learning_rate": 1.064699205448354e-05, "loss": 0.0948, "step": 3520 }, { "epoch": 2.377104377104377, "grad_norm": 3.084336519241333, "learning_rate": 1.0533484676503973e-05, "loss": 0.1052, "step": 3530 }, { "epoch": 2.3838383838383836, "grad_norm": 2.8842592239379883, "learning_rate": 1.0419977298524405e-05, "loss": 0.1051, "step": 3540 }, { "epoch": 2.3905723905723906, "grad_norm": 1.8973740339279175, "learning_rate": 1.0306469920544835e-05, "loss": 0.1224, "step": 3550 }, { "epoch": 2.3973063973063975, "grad_norm": 2.898562431335449, "learning_rate": 1.0192962542565267e-05, "loss": 0.117, "step": 3560 }, { "epoch": 2.404040404040404, "grad_norm": 2.5222558975219727, "learning_rate": 1.0079455164585697e-05, "loss": 0.0975, "step": 3570 }, { "epoch": 2.410774410774411, "grad_norm": 2.629905939102173, "learning_rate": 9.96594778660613e-06, "loss": 0.116, "step": 3580 }, { "epoch": 2.4175084175084174, "grad_norm": 2.554290294647217, "learning_rate": 9.852440408626561e-06, "loss": 0.112, "step": 3590 }, { "epoch": 2.4242424242424243, "grad_norm": 1.7490330934524536, "learning_rate": 9.738933030646992e-06, "loss": 0.088, "step": 3600 }, { "epoch": 2.4242424242424243, "eval_loss": 0.4696303904056549, "eval_runtime": 674.2707, "eval_samples_per_second": 2.103, "eval_steps_per_second": 0.132, "eval_wer": 0.46716349723031236, "step": 3600 }, { "epoch": 2.430976430976431, "grad_norm": 3.1101365089416504, "learning_rate": 9.625425652667424e-06, "loss": 0.0916, "step": 3610 }, { "epoch": 2.4377104377104377, "grad_norm": 4.431212425231934, "learning_rate": 9.511918274687854e-06, "loss": 0.119, "step": 3620 }, { "epoch": 2.4444444444444446, "grad_norm": 2.283841848373413, "learning_rate": 9.398410896708286e-06, "loss": 0.0921, "step": 3630 }, { "epoch": 2.451178451178451, "grad_norm": 2.228675127029419, "learning_rate": 9.284903518728718e-06, "loss": 0.0835, "step": 3640 }, { "epoch": 2.457912457912458, "grad_norm": 2.3716728687286377, "learning_rate": 9.171396140749148e-06, "loss": 0.0908, "step": 3650 }, { "epoch": 2.4646464646464645, "grad_norm": 2.604325532913208, "learning_rate": 9.05788876276958e-06, "loss": 0.0971, "step": 3660 }, { "epoch": 2.4713804713804715, "grad_norm": 2.1539206504821777, "learning_rate": 8.944381384790012e-06, "loss": 0.0815, "step": 3670 }, { "epoch": 2.478114478114478, "grad_norm": 2.422910213470459, "learning_rate": 8.830874006810442e-06, "loss": 0.1085, "step": 3680 }, { "epoch": 2.484848484848485, "grad_norm": 2.368211030960083, "learning_rate": 8.717366628830874e-06, "loss": 0.0947, "step": 3690 }, { "epoch": 2.4915824915824913, "grad_norm": 1.828069806098938, "learning_rate": 8.603859250851305e-06, "loss": 0.0867, "step": 3700 }, { "epoch": 2.4983164983164983, "grad_norm": 2.33329176902771, "learning_rate": 8.490351872871737e-06, "loss": 0.0941, "step": 3710 }, { "epoch": 2.505050505050505, "grad_norm": 2.341047525405884, "learning_rate": 8.376844494892169e-06, "loss": 0.0915, "step": 3720 }, { "epoch": 2.5117845117845117, "grad_norm": 1.9225627183914185, "learning_rate": 8.263337116912599e-06, "loss": 0.1044, "step": 3730 }, { "epoch": 2.5185185185185186, "grad_norm": 2.387437105178833, "learning_rate": 8.149829738933031e-06, "loss": 0.0987, "step": 3740 }, { "epoch": 2.525252525252525, "grad_norm": 2.9379942417144775, "learning_rate": 8.036322360953461e-06, "loss": 0.0871, "step": 3750 }, { "epoch": 2.531986531986532, "grad_norm": 3.075242042541504, "learning_rate": 7.922814982973893e-06, "loss": 0.0962, "step": 3760 }, { "epoch": 2.538720538720539, "grad_norm": 3.6734471321105957, "learning_rate": 7.809307604994325e-06, "loss": 0.0844, "step": 3770 }, { "epoch": 2.5454545454545454, "grad_norm": 2.5898001194000244, "learning_rate": 7.695800227014755e-06, "loss": 0.0996, "step": 3780 }, { "epoch": 2.5521885521885523, "grad_norm": 2.4215145111083984, "learning_rate": 7.5822928490351875e-06, "loss": 0.0857, "step": 3790 }, { "epoch": 2.558922558922559, "grad_norm": 3.2795231342315674, "learning_rate": 7.468785471055619e-06, "loss": 0.1002, "step": 3800 }, { "epoch": 2.5656565656565657, "grad_norm": 3.356985092163086, "learning_rate": 7.35527809307605e-06, "loss": 0.0956, "step": 3810 }, { "epoch": 2.5723905723905722, "grad_norm": 1.5472785234451294, "learning_rate": 7.241770715096481e-06, "loss": 0.0937, "step": 3820 }, { "epoch": 2.579124579124579, "grad_norm": 3.1027777194976807, "learning_rate": 7.128263337116913e-06, "loss": 0.1001, "step": 3830 }, { "epoch": 2.5858585858585856, "grad_norm": 2.8028059005737305, "learning_rate": 7.014755959137344e-06, "loss": 0.1012, "step": 3840 }, { "epoch": 2.5925925925925926, "grad_norm": 3.476177930831909, "learning_rate": 6.901248581157775e-06, "loss": 0.0843, "step": 3850 }, { "epoch": 2.5993265993265995, "grad_norm": 1.6291272640228271, "learning_rate": 6.787741203178206e-06, "loss": 0.1141, "step": 3860 }, { "epoch": 2.606060606060606, "grad_norm": 2.611839532852173, "learning_rate": 6.6742338251986375e-06, "loss": 0.0842, "step": 3870 }, { "epoch": 2.612794612794613, "grad_norm": 2.290695905685425, "learning_rate": 6.5607264472190694e-06, "loss": 0.1037, "step": 3880 }, { "epoch": 2.6195286195286194, "grad_norm": 2.7317962646484375, "learning_rate": 6.447219069239501e-06, "loss": 0.0967, "step": 3890 }, { "epoch": 2.6262626262626263, "grad_norm": 2.8561346530914307, "learning_rate": 6.333711691259932e-06, "loss": 0.1026, "step": 3900 }, { "epoch": 2.6329966329966332, "grad_norm": 1.468044638633728, "learning_rate": 6.220204313280364e-06, "loss": 0.0962, "step": 3910 }, { "epoch": 2.6397306397306397, "grad_norm": 2.2081100940704346, "learning_rate": 6.106696935300795e-06, "loss": 0.1087, "step": 3920 }, { "epoch": 2.6464646464646466, "grad_norm": 1.8171058893203735, "learning_rate": 5.993189557321226e-06, "loss": 0.1031, "step": 3930 }, { "epoch": 2.653198653198653, "grad_norm": 2.296617269515991, "learning_rate": 5.879682179341658e-06, "loss": 0.1066, "step": 3940 }, { "epoch": 2.65993265993266, "grad_norm": 2.02673077583313, "learning_rate": 5.766174801362089e-06, "loss": 0.0939, "step": 3950 }, { "epoch": 2.6666666666666665, "grad_norm": 2.2023749351501465, "learning_rate": 5.65266742338252e-06, "loss": 0.1186, "step": 3960 }, { "epoch": 2.6734006734006734, "grad_norm": 2.8352410793304443, "learning_rate": 5.539160045402951e-06, "loss": 0.0982, "step": 3970 }, { "epoch": 2.68013468013468, "grad_norm": 2.6541831493377686, "learning_rate": 5.425652667423383e-06, "loss": 0.1005, "step": 3980 }, { "epoch": 2.686868686868687, "grad_norm": 2.7797365188598633, "learning_rate": 5.3121452894438146e-06, "loss": 0.0946, "step": 3990 }, { "epoch": 2.6936026936026938, "grad_norm": 1.8120551109313965, "learning_rate": 5.198637911464246e-06, "loss": 0.1003, "step": 4000 }, { "epoch": 2.6936026936026938, "eval_loss": 0.4680774211883545, "eval_runtime": 686.7326, "eval_samples_per_second": 2.065, "eval_steps_per_second": 0.13, "eval_wer": 0.46013934098566617, "step": 4000 }, { "epoch": 2.7003367003367003, "grad_norm": 2.1801674365997314, "learning_rate": 5.085130533484677e-06, "loss": 0.087, "step": 4010 }, { "epoch": 2.707070707070707, "grad_norm": 2.256625175476074, "learning_rate": 4.971623155505108e-06, "loss": 0.0806, "step": 4020 }, { "epoch": 2.7138047138047137, "grad_norm": 2.6446785926818848, "learning_rate": 4.85811577752554e-06, "loss": 0.1, "step": 4030 }, { "epoch": 2.7205387205387206, "grad_norm": 1.7377904653549194, "learning_rate": 4.744608399545971e-06, "loss": 0.0923, "step": 4040 }, { "epoch": 2.7272727272727275, "grad_norm": 1.9209539890289307, "learning_rate": 4.631101021566402e-06, "loss": 0.0992, "step": 4050 }, { "epoch": 2.734006734006734, "grad_norm": 2.6267309188842773, "learning_rate": 4.517593643586833e-06, "loss": 0.0822, "step": 4060 }, { "epoch": 2.7407407407407405, "grad_norm": 1.8967944383621216, "learning_rate": 4.404086265607265e-06, "loss": 0.0807, "step": 4070 }, { "epoch": 2.7474747474747474, "grad_norm": 2.2259716987609863, "learning_rate": 4.2905788876276965e-06, "loss": 0.0794, "step": 4080 }, { "epoch": 2.7542087542087543, "grad_norm": 2.3132541179656982, "learning_rate": 4.177071509648128e-06, "loss": 0.0835, "step": 4090 }, { "epoch": 2.760942760942761, "grad_norm": 2.2206366062164307, "learning_rate": 4.063564131668559e-06, "loss": 0.0827, "step": 4100 }, { "epoch": 2.7676767676767677, "grad_norm": 2.6239089965820312, "learning_rate": 3.95005675368899e-06, "loss": 0.0946, "step": 4110 }, { "epoch": 2.774410774410774, "grad_norm": 2.0979490280151367, "learning_rate": 3.836549375709422e-06, "loss": 0.0682, "step": 4120 }, { "epoch": 2.781144781144781, "grad_norm": 2.2430787086486816, "learning_rate": 3.723041997729853e-06, "loss": 0.089, "step": 4130 }, { "epoch": 2.787878787878788, "grad_norm": 2.0071237087249756, "learning_rate": 3.6095346197502842e-06, "loss": 0.0892, "step": 4140 }, { "epoch": 2.7946127946127945, "grad_norm": 2.0898067951202393, "learning_rate": 3.4960272417707154e-06, "loss": 0.0879, "step": 4150 }, { "epoch": 2.8013468013468015, "grad_norm": 3.248400926589966, "learning_rate": 3.382519863791147e-06, "loss": 0.0928, "step": 4160 }, { "epoch": 2.808080808080808, "grad_norm": 2.3027138710021973, "learning_rate": 3.269012485811578e-06, "loss": 0.0876, "step": 4170 }, { "epoch": 2.814814814814815, "grad_norm": 2.523341417312622, "learning_rate": 3.1555051078320097e-06, "loss": 0.0902, "step": 4180 }, { "epoch": 2.821548821548822, "grad_norm": 1.5260744094848633, "learning_rate": 3.0419977298524404e-06, "loss": 0.0773, "step": 4190 }, { "epoch": 2.8282828282828283, "grad_norm": 1.5680999755859375, "learning_rate": 2.928490351872872e-06, "loss": 0.0862, "step": 4200 }, { "epoch": 2.8350168350168348, "grad_norm": 2.547013759613037, "learning_rate": 2.814982973893303e-06, "loss": 0.0972, "step": 4210 }, { "epoch": 2.8417508417508417, "grad_norm": 2.116196870803833, "learning_rate": 2.7014755959137347e-06, "loss": 0.0949, "step": 4220 }, { "epoch": 2.8484848484848486, "grad_norm": 2.7395036220550537, "learning_rate": 2.587968217934166e-06, "loss": 0.094, "step": 4230 }, { "epoch": 2.855218855218855, "grad_norm": 2.0705437660217285, "learning_rate": 2.4744608399545974e-06, "loss": 0.0991, "step": 4240 }, { "epoch": 2.861952861952862, "grad_norm": 3.2586395740509033, "learning_rate": 2.3609534619750285e-06, "loss": 0.0959, "step": 4250 }, { "epoch": 2.8686868686868685, "grad_norm": 2.0380172729492188, "learning_rate": 2.2474460839954596e-06, "loss": 0.091, "step": 4260 }, { "epoch": 2.8754208754208754, "grad_norm": 1.8960984945297241, "learning_rate": 2.1339387060158912e-06, "loss": 0.0867, "step": 4270 }, { "epoch": 2.8821548821548824, "grad_norm": 2.761885643005371, "learning_rate": 2.0204313280363224e-06, "loss": 0.0972, "step": 4280 }, { "epoch": 2.888888888888889, "grad_norm": 2.1381499767303467, "learning_rate": 1.9069239500567537e-06, "loss": 0.0929, "step": 4290 }, { "epoch": 2.8956228956228958, "grad_norm": 1.5396257638931274, "learning_rate": 1.793416572077185e-06, "loss": 0.088, "step": 4300 }, { "epoch": 2.9023569023569022, "grad_norm": 2.4630839824676514, "learning_rate": 1.6799091940976164e-06, "loss": 0.1068, "step": 4310 }, { "epoch": 2.909090909090909, "grad_norm": 1.8399533033370972, "learning_rate": 1.5664018161180478e-06, "loss": 0.0772, "step": 4320 }, { "epoch": 2.915824915824916, "grad_norm": 2.0763957500457764, "learning_rate": 1.4528944381384791e-06, "loss": 0.0885, "step": 4330 }, { "epoch": 2.9225589225589226, "grad_norm": 5.270694255828857, "learning_rate": 1.3393870601589105e-06, "loss": 0.0876, "step": 4340 }, { "epoch": 2.929292929292929, "grad_norm": 2.339585542678833, "learning_rate": 1.2258796821793418e-06, "loss": 0.0829, "step": 4350 }, { "epoch": 2.936026936026936, "grad_norm": 2.7977676391601562, "learning_rate": 1.112372304199773e-06, "loss": 0.1098, "step": 4360 }, { "epoch": 2.942760942760943, "grad_norm": 2.1591367721557617, "learning_rate": 9.988649262202043e-07, "loss": 0.0917, "step": 4370 }, { "epoch": 2.9494949494949494, "grad_norm": 2.4336767196655273, "learning_rate": 8.853575482406357e-07, "loss": 0.1013, "step": 4380 }, { "epoch": 2.9562289562289563, "grad_norm": 1.9739155769348145, "learning_rate": 7.718501702610669e-07, "loss": 0.0731, "step": 4390 }, { "epoch": 2.962962962962963, "grad_norm": 2.9581665992736816, "learning_rate": 6.583427922814983e-07, "loss": 0.1034, "step": 4400 }, { "epoch": 2.962962962962963, "eval_loss": 0.4650237560272217, "eval_runtime": 680.1256, "eval_samples_per_second": 2.085, "eval_steps_per_second": 0.131, "eval_wer": 0.4528582034149963, "step": 4400 } ], "logging_steps": 10, "max_steps": 4455, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.56588428967936e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }