{ "best_metric": null, "best_model_checkpoint": null, "epoch": 68.02721088435374, "eval_steps": 1000, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3401360544217687, "grad_norm": 6.3686203956604, "learning_rate": 5.000000000000001e-07, "loss": 1.2352, "step": 25 }, { "epoch": 0.6802721088435374, "grad_norm": 4.156219959259033, "learning_rate": 1.0000000000000002e-06, "loss": 0.7799, "step": 50 }, { "epoch": 1.0204081632653061, "grad_norm": 4.332057952880859, "learning_rate": 1.5e-06, "loss": 0.5115, "step": 75 }, { "epoch": 1.3605442176870748, "grad_norm": 3.7332186698913574, "learning_rate": 2.0000000000000003e-06, "loss": 0.4105, "step": 100 }, { "epoch": 1.7006802721088436, "grad_norm": 3.6035523414611816, "learning_rate": 2.5e-06, "loss": 0.374, "step": 125 }, { "epoch": 2.0408163265306123, "grad_norm": 2.3493571281433105, "learning_rate": 3e-06, "loss": 0.3344, "step": 150 }, { "epoch": 2.380952380952381, "grad_norm": 3.0683202743530273, "learning_rate": 3.48e-06, "loss": 0.248, "step": 175 }, { "epoch": 2.7210884353741496, "grad_norm": 3.189012289047241, "learning_rate": 3.980000000000001e-06, "loss": 0.2395, "step": 200 }, { "epoch": 3.061224489795918, "grad_norm": 2.2101962566375732, "learning_rate": 4.48e-06, "loss": 0.2258, "step": 225 }, { "epoch": 3.4013605442176873, "grad_norm": 2.2124788761138916, "learning_rate": 4.980000000000001e-06, "loss": 0.1494, "step": 250 }, { "epoch": 3.741496598639456, "grad_norm": 3.446359395980835, "learning_rate": 5.480000000000001e-06, "loss": 0.1512, "step": 275 }, { "epoch": 4.081632653061225, "grad_norm": 2.592689037322998, "learning_rate": 5.98e-06, "loss": 0.1359, "step": 300 }, { "epoch": 4.421768707482993, "grad_norm": 3.1709280014038086, "learning_rate": 6.480000000000001e-06, "loss": 0.0859, "step": 325 }, { "epoch": 4.761904761904762, "grad_norm": 2.485269546508789, "learning_rate": 6.98e-06, "loss": 0.0958, "step": 350 }, { "epoch": 5.1020408163265305, "grad_norm": 1.868928074836731, "learning_rate": 7.48e-06, "loss": 0.0846, "step": 375 }, { "epoch": 5.442176870748299, "grad_norm": 3.283317804336548, "learning_rate": 7.980000000000002e-06, "loss": 0.0575, "step": 400 }, { "epoch": 5.782312925170068, "grad_norm": 2.18278431892395, "learning_rate": 8.48e-06, "loss": 0.075, "step": 425 }, { "epoch": 6.122448979591836, "grad_norm": 1.7689893245697021, "learning_rate": 8.98e-06, "loss": 0.0637, "step": 450 }, { "epoch": 6.462585034013605, "grad_norm": 2.60971736907959, "learning_rate": 9.48e-06, "loss": 0.0517, "step": 475 }, { "epoch": 6.802721088435375, "grad_norm": 1.903644323348999, "learning_rate": 9.980000000000001e-06, "loss": 0.0578, "step": 500 }, { "epoch": 7.142857142857143, "grad_norm": 1.4152497053146362, "learning_rate": 9.946666666666667e-06, "loss": 0.0565, "step": 525 }, { "epoch": 7.482993197278912, "grad_norm": 2.119438409805298, "learning_rate": 9.891111111111113e-06, "loss": 0.0436, "step": 550 }, { "epoch": 7.8231292517006805, "grad_norm": 1.8895118236541748, "learning_rate": 9.835555555555556e-06, "loss": 0.042, "step": 575 }, { "epoch": 8.16326530612245, "grad_norm": 1.7676234245300293, "learning_rate": 9.780000000000001e-06, "loss": 0.0351, "step": 600 }, { "epoch": 8.503401360544217, "grad_norm": 1.8845597505569458, "learning_rate": 9.724444444444445e-06, "loss": 0.0279, "step": 625 }, { "epoch": 8.843537414965986, "grad_norm": 9.495149612426758, "learning_rate": 9.66888888888889e-06, "loss": 0.031, "step": 650 }, { "epoch": 9.183673469387756, "grad_norm": 1.6925195455551147, "learning_rate": 9.613333333333335e-06, "loss": 0.0303, "step": 675 }, { "epoch": 9.523809523809524, "grad_norm": 1.4979898929595947, "learning_rate": 9.557777777777777e-06, "loss": 0.023, "step": 700 }, { "epoch": 9.863945578231293, "grad_norm": 2.2269773483276367, "learning_rate": 9.502222222222223e-06, "loss": 0.0261, "step": 725 }, { "epoch": 10.204081632653061, "grad_norm": 1.0259639024734497, "learning_rate": 9.446666666666667e-06, "loss": 0.0226, "step": 750 }, { "epoch": 10.54421768707483, "grad_norm": 1.9924999475479126, "learning_rate": 9.391111111111111e-06, "loss": 0.0205, "step": 775 }, { "epoch": 10.884353741496598, "grad_norm": 1.616970419883728, "learning_rate": 9.335555555555557e-06, "loss": 0.0199, "step": 800 }, { "epoch": 11.224489795918368, "grad_norm": 0.922492504119873, "learning_rate": 9.280000000000001e-06, "loss": 0.0161, "step": 825 }, { "epoch": 11.564625850340136, "grad_norm": 2.508662223815918, "learning_rate": 9.224444444444445e-06, "loss": 0.0145, "step": 850 }, { "epoch": 11.904761904761905, "grad_norm": 1.371565341949463, "learning_rate": 9.168888888888889e-06, "loss": 0.0179, "step": 875 }, { "epoch": 12.244897959183673, "grad_norm": 1.303175687789917, "learning_rate": 9.113333333333335e-06, "loss": 0.0155, "step": 900 }, { "epoch": 12.585034013605442, "grad_norm": 1.1102138757705688, "learning_rate": 9.057777777777779e-06, "loss": 0.012, "step": 925 }, { "epoch": 12.92517006802721, "grad_norm": 0.8504889011383057, "learning_rate": 9.002222222222223e-06, "loss": 0.0121, "step": 950 }, { "epoch": 13.26530612244898, "grad_norm": 0.8174204230308533, "learning_rate": 8.946666666666669e-06, "loss": 0.0106, "step": 975 }, { "epoch": 13.60544217687075, "grad_norm": 1.821559190750122, "learning_rate": 8.891111111111111e-06, "loss": 0.0112, "step": 1000 }, { "epoch": 13.60544217687075, "eval_loss": 0.39124733209609985, "eval_runtime": 93.6528, "eval_samples_per_second": 2.776, "eval_steps_per_second": 0.182, "eval_wer": 0.23946288698246923, "step": 1000 }, { "epoch": 13.945578231292517, "grad_norm": 1.2810653448104858, "learning_rate": 8.835555555555557e-06, "loss": 0.0111, "step": 1025 }, { "epoch": 14.285714285714286, "grad_norm": 1.2741467952728271, "learning_rate": 8.78e-06, "loss": 0.0097, "step": 1050 }, { "epoch": 14.625850340136054, "grad_norm": 0.8524342179298401, "learning_rate": 8.724444444444445e-06, "loss": 0.0076, "step": 1075 }, { "epoch": 14.965986394557824, "grad_norm": 1.643485426902771, "learning_rate": 8.66888888888889e-06, "loss": 0.0074, "step": 1100 }, { "epoch": 15.306122448979592, "grad_norm": 0.40055137872695923, "learning_rate": 8.613333333333333e-06, "loss": 0.007, "step": 1125 }, { "epoch": 15.646258503401361, "grad_norm": 1.1712241172790527, "learning_rate": 8.557777777777778e-06, "loss": 0.0072, "step": 1150 }, { "epoch": 15.986394557823129, "grad_norm": 0.32212740182876587, "learning_rate": 8.502222222222223e-06, "loss": 0.007, "step": 1175 }, { "epoch": 16.3265306122449, "grad_norm": 0.2166888266801834, "learning_rate": 8.446666666666668e-06, "loss": 0.0054, "step": 1200 }, { "epoch": 16.666666666666668, "grad_norm": 0.12256942689418793, "learning_rate": 8.391111111111112e-06, "loss": 0.0039, "step": 1225 }, { "epoch": 17.006802721088434, "grad_norm": 0.26391106843948364, "learning_rate": 8.335555555555556e-06, "loss": 0.0042, "step": 1250 }, { "epoch": 17.346938775510203, "grad_norm": 0.24293136596679688, "learning_rate": 8.28e-06, "loss": 0.0036, "step": 1275 }, { "epoch": 17.687074829931973, "grad_norm": 0.27556732296943665, "learning_rate": 8.224444444444444e-06, "loss": 0.0028, "step": 1300 }, { "epoch": 18.027210884353742, "grad_norm": 0.9470342397689819, "learning_rate": 8.16888888888889e-06, "loss": 0.0042, "step": 1325 }, { "epoch": 18.367346938775512, "grad_norm": 0.14824901521205902, "learning_rate": 8.113333333333334e-06, "loss": 0.0036, "step": 1350 }, { "epoch": 18.707482993197278, "grad_norm": 1.2378164529800415, "learning_rate": 8.057777777777778e-06, "loss": 0.0046, "step": 1375 }, { "epoch": 19.047619047619047, "grad_norm": 2.7857964038848877, "learning_rate": 8.002222222222222e-06, "loss": 0.004, "step": 1400 }, { "epoch": 19.387755102040817, "grad_norm": 0.5624294281005859, "learning_rate": 7.946666666666666e-06, "loss": 0.0073, "step": 1425 }, { "epoch": 19.727891156462587, "grad_norm": 0.18347227573394775, "learning_rate": 7.891111111111112e-06, "loss": 0.0058, "step": 1450 }, { "epoch": 20.068027210884352, "grad_norm": 0.3734131455421448, "learning_rate": 7.835555555555556e-06, "loss": 0.0066, "step": 1475 }, { "epoch": 20.408163265306122, "grad_norm": 0.6362162828445435, "learning_rate": 7.78e-06, "loss": 0.0075, "step": 1500 }, { "epoch": 20.74829931972789, "grad_norm": 0.8834488391876221, "learning_rate": 7.724444444444446e-06, "loss": 0.0057, "step": 1525 }, { "epoch": 21.08843537414966, "grad_norm": 0.06029968708753586, "learning_rate": 7.66888888888889e-06, "loss": 0.0038, "step": 1550 }, { "epoch": 21.428571428571427, "grad_norm": 1.0105019807815552, "learning_rate": 7.613333333333334e-06, "loss": 0.0039, "step": 1575 }, { "epoch": 21.768707482993197, "grad_norm": 0.5381556153297424, "learning_rate": 7.557777777777779e-06, "loss": 0.0036, "step": 1600 }, { "epoch": 22.108843537414966, "grad_norm": 0.08822619915008545, "learning_rate": 7.502222222222223e-06, "loss": 0.004, "step": 1625 }, { "epoch": 22.448979591836736, "grad_norm": 0.43402913212776184, "learning_rate": 7.446666666666668e-06, "loss": 0.0029, "step": 1650 }, { "epoch": 22.7891156462585, "grad_norm": 0.9147214293479919, "learning_rate": 7.3911111111111125e-06, "loss": 0.0024, "step": 1675 }, { "epoch": 23.12925170068027, "grad_norm": 0.48390820622444153, "learning_rate": 7.335555555555556e-06, "loss": 0.0036, "step": 1700 }, { "epoch": 23.46938775510204, "grad_norm": 0.10725089907646179, "learning_rate": 7.280000000000001e-06, "loss": 0.0023, "step": 1725 }, { "epoch": 23.80952380952381, "grad_norm": 0.09872180968523026, "learning_rate": 7.224444444444445e-06, "loss": 0.0018, "step": 1750 }, { "epoch": 24.14965986394558, "grad_norm": 0.6679806113243103, "learning_rate": 7.1688888888888895e-06, "loss": 0.0017, "step": 1775 }, { "epoch": 24.489795918367346, "grad_norm": 0.02364278770983219, "learning_rate": 7.113333333333334e-06, "loss": 0.001, "step": 1800 }, { "epoch": 24.829931972789115, "grad_norm": 0.02158285863697529, "learning_rate": 7.057777777777778e-06, "loss": 0.0008, "step": 1825 }, { "epoch": 25.170068027210885, "grad_norm": 0.014277754351496696, "learning_rate": 7.0022222222222225e-06, "loss": 0.0007, "step": 1850 }, { "epoch": 25.510204081632654, "grad_norm": 0.012241716496646404, "learning_rate": 6.946666666666667e-06, "loss": 0.0005, "step": 1875 }, { "epoch": 25.85034013605442, "grad_norm": 0.02822299115359783, "learning_rate": 6.891111111111111e-06, "loss": 0.0005, "step": 1900 }, { "epoch": 26.19047619047619, "grad_norm": 0.009908878244459629, "learning_rate": 6.835555555555556e-06, "loss": 0.0004, "step": 1925 }, { "epoch": 26.53061224489796, "grad_norm": 0.008494613692164421, "learning_rate": 6.780000000000001e-06, "loss": 0.0004, "step": 1950 }, { "epoch": 26.87074829931973, "grad_norm": 0.007728059310466051, "learning_rate": 6.724444444444444e-06, "loss": 0.0004, "step": 1975 }, { "epoch": 27.2108843537415, "grad_norm": 0.007557597942650318, "learning_rate": 6.668888888888889e-06, "loss": 0.0004, "step": 2000 }, { "epoch": 27.2108843537415, "eval_loss": 0.45324987173080444, "eval_runtime": 93.804, "eval_samples_per_second": 2.772, "eval_steps_per_second": 0.181, "eval_wer": 0.2245430809399478, "step": 2000 }, { "epoch": 27.551020408163264, "grad_norm": 0.009665679186582565, "learning_rate": 6.613333333333334e-06, "loss": 0.0004, "step": 2025 }, { "epoch": 27.891156462585034, "grad_norm": 0.006815009750425816, "learning_rate": 6.557777777777778e-06, "loss": 0.0004, "step": 2050 }, { "epoch": 28.231292517006803, "grad_norm": 0.007364605087786913, "learning_rate": 6.502222222222223e-06, "loss": 0.0003, "step": 2075 }, { "epoch": 28.571428571428573, "grad_norm": 0.006635705474764109, "learning_rate": 6.446666666666668e-06, "loss": 0.0003, "step": 2100 }, { "epoch": 28.91156462585034, "grad_norm": 0.008073186501860619, "learning_rate": 6.391111111111111e-06, "loss": 0.0003, "step": 2125 }, { "epoch": 29.25170068027211, "grad_norm": 0.006342068314552307, "learning_rate": 6.335555555555556e-06, "loss": 0.0003, "step": 2150 }, { "epoch": 29.591836734693878, "grad_norm": 0.006897253915667534, "learning_rate": 6.280000000000001e-06, "loss": 0.0003, "step": 2175 }, { "epoch": 29.931972789115648, "grad_norm": 0.006329766474664211, "learning_rate": 6.224444444444445e-06, "loss": 0.0003, "step": 2200 }, { "epoch": 30.272108843537413, "grad_norm": 0.006696599069982767, "learning_rate": 6.16888888888889e-06, "loss": 0.0003, "step": 2225 }, { "epoch": 30.612244897959183, "grad_norm": 0.0058494312688708305, "learning_rate": 6.113333333333333e-06, "loss": 0.0003, "step": 2250 }, { "epoch": 30.952380952380953, "grad_norm": 0.005851502064615488, "learning_rate": 6.057777777777778e-06, "loss": 0.0003, "step": 2275 }, { "epoch": 31.292517006802722, "grad_norm": 0.0047736396081745625, "learning_rate": 6.002222222222223e-06, "loss": 0.0003, "step": 2300 }, { "epoch": 31.632653061224488, "grad_norm": 0.006324047688394785, "learning_rate": 5.946666666666668e-06, "loss": 0.0003, "step": 2325 }, { "epoch": 31.972789115646258, "grad_norm": 0.005418767221271992, "learning_rate": 5.891111111111112e-06, "loss": 0.0003, "step": 2350 }, { "epoch": 32.31292517006803, "grad_norm": 0.005563849117606878, "learning_rate": 5.8355555555555565e-06, "loss": 0.0003, "step": 2375 }, { "epoch": 32.6530612244898, "grad_norm": 0.005108444020152092, "learning_rate": 5.78e-06, "loss": 0.0002, "step": 2400 }, { "epoch": 32.993197278911566, "grad_norm": 0.004787669517099857, "learning_rate": 5.724444444444445e-06, "loss": 0.0003, "step": 2425 }, { "epoch": 33.333333333333336, "grad_norm": 0.004051292315125465, "learning_rate": 5.6688888888888895e-06, "loss": 0.0002, "step": 2450 }, { "epoch": 33.673469387755105, "grad_norm": 0.005220952443778515, "learning_rate": 5.613333333333334e-06, "loss": 0.0002, "step": 2475 }, { "epoch": 34.01360544217687, "grad_norm": 0.0054339151829481125, "learning_rate": 5.557777777777778e-06, "loss": 0.0002, "step": 2500 }, { "epoch": 34.35374149659864, "grad_norm": 0.004454713314771652, "learning_rate": 5.5022222222222224e-06, "loss": 0.0002, "step": 2525 }, { "epoch": 34.69387755102041, "grad_norm": 0.005186771042644978, "learning_rate": 5.4466666666666665e-06, "loss": 0.0002, "step": 2550 }, { "epoch": 35.034013605442176, "grad_norm": 0.004502983298152685, "learning_rate": 5.391111111111111e-06, "loss": 0.0002, "step": 2575 }, { "epoch": 35.374149659863946, "grad_norm": 0.004623442888259888, "learning_rate": 5.335555555555556e-06, "loss": 0.0002, "step": 2600 }, { "epoch": 35.714285714285715, "grad_norm": 0.00428406847640872, "learning_rate": 5.28e-06, "loss": 0.0002, "step": 2625 }, { "epoch": 36.054421768707485, "grad_norm": 0.004207184072583914, "learning_rate": 5.224444444444445e-06, "loss": 0.0002, "step": 2650 }, { "epoch": 36.394557823129254, "grad_norm": 0.004264296032488346, "learning_rate": 5.168888888888889e-06, "loss": 0.0002, "step": 2675 }, { "epoch": 36.734693877551024, "grad_norm": 0.0045384918339550495, "learning_rate": 5.113333333333333e-06, "loss": 0.0002, "step": 2700 }, { "epoch": 37.074829931972786, "grad_norm": 0.0036523097660392523, "learning_rate": 5.057777777777778e-06, "loss": 0.0002, "step": 2725 }, { "epoch": 37.414965986394556, "grad_norm": 0.003838042262941599, "learning_rate": 5.002222222222223e-06, "loss": 0.0002, "step": 2750 }, { "epoch": 37.755102040816325, "grad_norm": 0.0043487842194736, "learning_rate": 4.946666666666667e-06, "loss": 0.0002, "step": 2775 }, { "epoch": 38.095238095238095, "grad_norm": 0.004179787822067738, "learning_rate": 4.891111111111111e-06, "loss": 0.0002, "step": 2800 }, { "epoch": 38.435374149659864, "grad_norm": 0.0036503339651972055, "learning_rate": 4.835555555555556e-06, "loss": 0.0002, "step": 2825 }, { "epoch": 38.775510204081634, "grad_norm": 0.0033976498525589705, "learning_rate": 4.78e-06, "loss": 0.0002, "step": 2850 }, { "epoch": 39.1156462585034, "grad_norm": 0.0038732371758669615, "learning_rate": 4.724444444444445e-06, "loss": 0.0002, "step": 2875 }, { "epoch": 39.45578231292517, "grad_norm": 0.003690896322950721, "learning_rate": 4.66888888888889e-06, "loss": 0.0002, "step": 2900 }, { "epoch": 39.795918367346935, "grad_norm": 0.005354354623705149, "learning_rate": 4.613333333333334e-06, "loss": 0.0002, "step": 2925 }, { "epoch": 40.136054421768705, "grad_norm": 0.0036710058338940144, "learning_rate": 4.557777777777778e-06, "loss": 0.0002, "step": 2950 }, { "epoch": 40.476190476190474, "grad_norm": 0.005290627479553223, "learning_rate": 4.502222222222223e-06, "loss": 0.0002, "step": 2975 }, { "epoch": 40.816326530612244, "grad_norm": 0.003753775032237172, "learning_rate": 4.446666666666667e-06, "loss": 0.0002, "step": 3000 }, { "epoch": 40.816326530612244, "eval_loss": 0.4882185459136963, "eval_runtime": 93.7044, "eval_samples_per_second": 2.775, "eval_steps_per_second": 0.181, "eval_wer": 0.2174561730697501, "step": 3000 }, { "epoch": 41.156462585034014, "grad_norm": 0.004405771382153034, "learning_rate": 4.391111111111112e-06, "loss": 0.0002, "step": 3025 }, { "epoch": 41.49659863945578, "grad_norm": 0.0036535647232085466, "learning_rate": 4.3355555555555565e-06, "loss": 0.0002, "step": 3050 }, { "epoch": 41.83673469387755, "grad_norm": 0.0036972814705222845, "learning_rate": 4.2800000000000005e-06, "loss": 0.0002, "step": 3075 }, { "epoch": 42.17687074829932, "grad_norm": 0.004110525827854872, "learning_rate": 4.2244444444444446e-06, "loss": 0.0002, "step": 3100 }, { "epoch": 42.51700680272109, "grad_norm": 0.0035640313290059566, "learning_rate": 4.168888888888889e-06, "loss": 0.0002, "step": 3125 }, { "epoch": 42.857142857142854, "grad_norm": 0.004424062091857195, "learning_rate": 4.1133333333333335e-06, "loss": 0.0002, "step": 3150 }, { "epoch": 43.197278911564624, "grad_norm": 0.0032335869036614895, "learning_rate": 4.057777777777778e-06, "loss": 0.0002, "step": 3175 }, { "epoch": 43.53741496598639, "grad_norm": 0.0037836297415196896, "learning_rate": 4.002222222222222e-06, "loss": 0.0002, "step": 3200 }, { "epoch": 43.87755102040816, "grad_norm": 0.003560603130608797, "learning_rate": 3.946666666666667e-06, "loss": 0.0002, "step": 3225 }, { "epoch": 44.21768707482993, "grad_norm": 0.003510043490678072, "learning_rate": 3.891111111111111e-06, "loss": 0.0002, "step": 3250 }, { "epoch": 44.5578231292517, "grad_norm": 0.0028691268526017666, "learning_rate": 3.835555555555555e-06, "loss": 0.0002, "step": 3275 }, { "epoch": 44.89795918367347, "grad_norm": 0.0031337698455899954, "learning_rate": 3.7800000000000002e-06, "loss": 0.0001, "step": 3300 }, { "epoch": 45.23809523809524, "grad_norm": 0.00317736086435616, "learning_rate": 3.724444444444445e-06, "loss": 0.0001, "step": 3325 }, { "epoch": 45.578231292517, "grad_norm": 0.0029643489979207516, "learning_rate": 3.668888888888889e-06, "loss": 0.0002, "step": 3350 }, { "epoch": 45.91836734693877, "grad_norm": 0.003078688168898225, "learning_rate": 3.6133333333333336e-06, "loss": 0.0001, "step": 3375 }, { "epoch": 46.25850340136054, "grad_norm": 0.003043568693101406, "learning_rate": 3.5577777777777785e-06, "loss": 0.0001, "step": 3400 }, { "epoch": 46.59863945578231, "grad_norm": 0.003218689002096653, "learning_rate": 3.5022222222222225e-06, "loss": 0.0001, "step": 3425 }, { "epoch": 46.93877551020408, "grad_norm": 0.003266324056312442, "learning_rate": 3.446666666666667e-06, "loss": 0.0001, "step": 3450 }, { "epoch": 47.27891156462585, "grad_norm": 0.003477707039564848, "learning_rate": 3.391111111111111e-06, "loss": 0.0001, "step": 3475 }, { "epoch": 47.61904761904762, "grad_norm": 0.0027373475022614002, "learning_rate": 3.335555555555556e-06, "loss": 0.0001, "step": 3500 }, { "epoch": 47.95918367346939, "grad_norm": 0.002786448458209634, "learning_rate": 3.2800000000000004e-06, "loss": 0.0001, "step": 3525 }, { "epoch": 48.29931972789116, "grad_norm": 0.002394324168562889, "learning_rate": 3.2244444444444444e-06, "loss": 0.0001, "step": 3550 }, { "epoch": 48.63945578231292, "grad_norm": 0.003250208217650652, "learning_rate": 3.1688888888888893e-06, "loss": 0.0001, "step": 3575 }, { "epoch": 48.97959183673469, "grad_norm": 0.0029996377415955067, "learning_rate": 3.1133333333333337e-06, "loss": 0.0001, "step": 3600 }, { "epoch": 49.31972789115646, "grad_norm": 0.0026746434159576893, "learning_rate": 3.0577777777777778e-06, "loss": 0.0001, "step": 3625 }, { "epoch": 49.65986394557823, "grad_norm": 0.00262379739433527, "learning_rate": 3.0022222222222227e-06, "loss": 0.0001, "step": 3650 }, { "epoch": 50.0, "grad_norm": 0.0029098980594426394, "learning_rate": 2.946666666666667e-06, "loss": 0.0001, "step": 3675 }, { "epoch": 50.34013605442177, "grad_norm": 0.002616139827296138, "learning_rate": 2.891111111111111e-06, "loss": 0.0001, "step": 3700 }, { "epoch": 50.68027210884354, "grad_norm": 0.0029571950435638428, "learning_rate": 2.835555555555556e-06, "loss": 0.0001, "step": 3725 }, { "epoch": 51.02040816326531, "grad_norm": 0.0027916007675230503, "learning_rate": 2.7800000000000005e-06, "loss": 0.0001, "step": 3750 }, { "epoch": 51.36054421768708, "grad_norm": 0.002735557733103633, "learning_rate": 2.7244444444444445e-06, "loss": 0.0001, "step": 3775 }, { "epoch": 51.70068027210884, "grad_norm": 0.0023191324435174465, "learning_rate": 2.6688888888888894e-06, "loss": 0.0001, "step": 3800 }, { "epoch": 52.04081632653061, "grad_norm": 0.0034847650676965714, "learning_rate": 2.6133333333333334e-06, "loss": 0.0001, "step": 3825 }, { "epoch": 52.38095238095238, "grad_norm": 0.002770556602627039, "learning_rate": 2.557777777777778e-06, "loss": 0.0001, "step": 3850 }, { "epoch": 52.72108843537415, "grad_norm": 0.0030505817849189043, "learning_rate": 2.5022222222222224e-06, "loss": 0.0001, "step": 3875 }, { "epoch": 53.06122448979592, "grad_norm": 0.003404865274205804, "learning_rate": 2.446666666666667e-06, "loss": 0.0001, "step": 3900 }, { "epoch": 53.40136054421769, "grad_norm": 0.0026544102001935244, "learning_rate": 2.3911111111111113e-06, "loss": 0.0001, "step": 3925 }, { "epoch": 53.74149659863946, "grad_norm": 0.00271439622156322, "learning_rate": 2.3355555555555557e-06, "loss": 0.0001, "step": 3950 }, { "epoch": 54.08163265306123, "grad_norm": 0.0033124638721346855, "learning_rate": 2.28e-06, "loss": 0.0001, "step": 3975 }, { "epoch": 54.421768707483, "grad_norm": 0.0025922644417732954, "learning_rate": 2.2244444444444447e-06, "loss": 0.0001, "step": 4000 }, { "epoch": 54.421768707483, "eval_loss": 0.5051469206809998, "eval_runtime": 95.0455, "eval_samples_per_second": 2.736, "eval_steps_per_second": 0.179, "eval_wer": 0.21484520701230883, "step": 4000 }, { "epoch": 54.76190476190476, "grad_norm": 0.0020597511902451515, "learning_rate": 2.168888888888889e-06, "loss": 0.0001, "step": 4025 }, { "epoch": 55.10204081632653, "grad_norm": 0.002817349275574088, "learning_rate": 2.1133333333333336e-06, "loss": 0.0001, "step": 4050 }, { "epoch": 55.4421768707483, "grad_norm": 0.003287636674940586, "learning_rate": 2.057777777777778e-06, "loss": 0.0001, "step": 4075 }, { "epoch": 55.78231292517007, "grad_norm": 0.00247744913212955, "learning_rate": 2.0022222222222225e-06, "loss": 0.0001, "step": 4100 }, { "epoch": 56.12244897959184, "grad_norm": 0.003431103890761733, "learning_rate": 1.9466666666666665e-06, "loss": 0.0001, "step": 4125 }, { "epoch": 56.46258503401361, "grad_norm": 0.0024367747828364372, "learning_rate": 1.8911111111111114e-06, "loss": 0.0001, "step": 4150 }, { "epoch": 56.802721088435376, "grad_norm": 0.0022823926992714405, "learning_rate": 1.8355555555555557e-06, "loss": 0.0001, "step": 4175 }, { "epoch": 57.142857142857146, "grad_norm": 0.0022000963799655437, "learning_rate": 1.7800000000000001e-06, "loss": 0.0001, "step": 4200 }, { "epoch": 57.48299319727891, "grad_norm": 0.0023311020340770483, "learning_rate": 1.7244444444444448e-06, "loss": 0.0001, "step": 4225 }, { "epoch": 57.82312925170068, "grad_norm": 0.002466644160449505, "learning_rate": 1.668888888888889e-06, "loss": 0.0001, "step": 4250 }, { "epoch": 58.16326530612245, "grad_norm": 0.0023317814338952303, "learning_rate": 1.6133333333333335e-06, "loss": 0.0001, "step": 4275 }, { "epoch": 58.50340136054422, "grad_norm": 0.0034895280841737986, "learning_rate": 1.5577777777777777e-06, "loss": 0.0001, "step": 4300 }, { "epoch": 58.843537414965986, "grad_norm": 0.002141441684216261, "learning_rate": 1.5022222222222224e-06, "loss": 0.0001, "step": 4325 }, { "epoch": 59.183673469387756, "grad_norm": 0.0023929886519908905, "learning_rate": 1.4466666666666669e-06, "loss": 0.0001, "step": 4350 }, { "epoch": 59.523809523809526, "grad_norm": 0.002914367476478219, "learning_rate": 1.3911111111111111e-06, "loss": 0.0001, "step": 4375 }, { "epoch": 59.863945578231295, "grad_norm": 0.0023239688016474247, "learning_rate": 1.3355555555555558e-06, "loss": 0.0001, "step": 4400 }, { "epoch": 60.204081632653065, "grad_norm": 0.00241728313267231, "learning_rate": 1.28e-06, "loss": 0.0001, "step": 4425 }, { "epoch": 60.54421768707483, "grad_norm": 0.0032376388553529978, "learning_rate": 1.2244444444444445e-06, "loss": 0.0001, "step": 4450 }, { "epoch": 60.8843537414966, "grad_norm": 0.003632117761299014, "learning_rate": 1.168888888888889e-06, "loss": 0.0001, "step": 4475 }, { "epoch": 61.224489795918366, "grad_norm": 0.002522936789318919, "learning_rate": 1.1133333333333334e-06, "loss": 0.0001, "step": 4500 }, { "epoch": 61.564625850340136, "grad_norm": 0.002181953750550747, "learning_rate": 1.0577777777777779e-06, "loss": 0.0001, "step": 4525 }, { "epoch": 61.904761904761905, "grad_norm": 0.0020987866446375847, "learning_rate": 1.0022222222222223e-06, "loss": 0.0001, "step": 4550 }, { "epoch": 62.244897959183675, "grad_norm": 0.002102503553032875, "learning_rate": 9.466666666666667e-07, "loss": 0.0001, "step": 4575 }, { "epoch": 62.585034013605444, "grad_norm": 0.0019837727304548025, "learning_rate": 8.911111111111112e-07, "loss": 0.0001, "step": 4600 }, { "epoch": 62.925170068027214, "grad_norm": 0.002303441520780325, "learning_rate": 8.355555555555556e-07, "loss": 0.0001, "step": 4625 }, { "epoch": 63.265306122448976, "grad_norm": 0.007395027671009302, "learning_rate": 7.8e-07, "loss": 0.0001, "step": 4650 }, { "epoch": 63.605442176870746, "grad_norm": 0.002733208704739809, "learning_rate": 7.244444444444446e-07, "loss": 0.0001, "step": 4675 }, { "epoch": 63.945578231292515, "grad_norm": 0.0020845523104071617, "learning_rate": 6.68888888888889e-07, "loss": 0.0001, "step": 4700 }, { "epoch": 64.28571428571429, "grad_norm": 0.0019409642554819584, "learning_rate": 6.133333333333333e-07, "loss": 0.0001, "step": 4725 }, { "epoch": 64.62585034013605, "grad_norm": 0.00258248602040112, "learning_rate": 5.577777777777779e-07, "loss": 0.0001, "step": 4750 }, { "epoch": 64.96598639455782, "grad_norm": 0.0025006316136568785, "learning_rate": 5.022222222222222e-07, "loss": 0.0001, "step": 4775 }, { "epoch": 65.3061224489796, "grad_norm": 0.0022064538206905127, "learning_rate": 4.466666666666667e-07, "loss": 0.0001, "step": 4800 }, { "epoch": 65.64625850340136, "grad_norm": 0.002108414890244603, "learning_rate": 3.9111111111111115e-07, "loss": 0.0001, "step": 4825 }, { "epoch": 65.98639455782313, "grad_norm": 0.0021663971710950136, "learning_rate": 3.3555555555555556e-07, "loss": 0.0001, "step": 4850 }, { "epoch": 66.3265306122449, "grad_norm": 0.00204038736410439, "learning_rate": 2.8e-07, "loss": 0.0001, "step": 4875 }, { "epoch": 66.66666666666667, "grad_norm": 0.0022622975520789623, "learning_rate": 2.2444444444444445e-07, "loss": 0.0001, "step": 4900 }, { "epoch": 67.00680272108843, "grad_norm": 0.0033368293661624193, "learning_rate": 1.6888888888888888e-07, "loss": 0.0001, "step": 4925 }, { "epoch": 67.34693877551021, "grad_norm": 0.0019737225957214832, "learning_rate": 1.1333333333333336e-07, "loss": 0.0001, "step": 4950 }, { "epoch": 67.68707482993197, "grad_norm": 0.0019130747532472014, "learning_rate": 5.777777777777778e-08, "loss": 0.0001, "step": 4975 }, { "epoch": 68.02721088435374, "grad_norm": 0.002000050852075219, "learning_rate": 2.2222222222222225e-09, "loss": 0.0001, "step": 5000 }, { "epoch": 68.02721088435374, "eval_loss": 0.5118595957756042, "eval_runtime": 95.0278, "eval_samples_per_second": 2.736, "eval_steps_per_second": 0.179, "eval_wer": 0.21671018276762402, "step": 5000 }, { "epoch": 68.02721088435374, "step": 5000, "total_flos": 3.378304801456128e+20, "train_loss": 0.03018118931162171, "train_runtime": 39486.7724, "train_samples_per_second": 4.052, "train_steps_per_second": 0.127 } ], "logging_steps": 25, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 69, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.378304801456128e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }