{ "best_metric": 0.38558459281921387, "best_model_checkpoint": "./Hubert-common_voice_JSUT-ja-demo-roma/checkpoint-10300", "epoch": 20.0, "eval_steps": 100, "global_step": 10340, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19342359767891681, "eval_cer": 3.3906697851852106, "eval_loss": 16.271892547607422, "eval_runtime": 235.1592, "eval_samples_per_second": 23.214, "eval_steps_per_second": 2.904, "eval_wer": 3.0917750503755266, "step": 100 }, { "epoch": 0.38684719535783363, "eval_cer": 2.893664453340365, "eval_loss": 16.005735397338867, "eval_runtime": 236.1908, "eval_samples_per_second": 23.113, "eval_steps_per_second": 2.892, "eval_wer": 2.8734200403004215, "step": 200 }, { "epoch": 0.5802707930367504, "eval_cer": 1.690616021560241, "eval_loss": 15.374765396118164, "eval_runtime": 231.2146, "eval_samples_per_second": 23.61, "eval_steps_per_second": 2.954, "eval_wer": 1.9646455394760944, "step": 300 }, { "epoch": 0.7736943907156673, "eval_cer": 0.9291963879062664, "eval_loss": 13.043855667114258, "eval_runtime": 226.4398, "eval_samples_per_second": 24.108, "eval_steps_per_second": 3.016, "eval_wer": 1.0, "step": 400 }, { "epoch": 0.9671179883945842, "grad_norm": 47.46343994140625, "learning_rate": 1.1904e-06, "loss": 11.8785, "step": 500 }, { "epoch": 0.9671179883945842, "eval_cer": 0.929223783383958, "eval_loss": 7.6484761238098145, "eval_runtime": 226.1704, "eval_samples_per_second": 24.137, "eval_steps_per_second": 3.02, "eval_wer": 1.0, "step": 500 }, { "epoch": 1.1605415860735009, "eval_cer": 0.929223783383958, "eval_loss": 6.007350444793701, "eval_runtime": 228.9926, "eval_samples_per_second": 23.839, "eval_steps_per_second": 2.983, "eval_wer": 1.0, "step": 600 }, { "epoch": 1.3539651837524178, "eval_cer": 0.929223783383958, "eval_loss": 5.646536827087402, "eval_runtime": 323.7978, "eval_samples_per_second": 16.859, "eval_steps_per_second": 2.109, "eval_wer": 1.0, "step": 700 }, { "epoch": 1.5473887814313345, "eval_cer": 0.929223783383958, "eval_loss": 5.502617359161377, "eval_runtime": 258.5966, "eval_samples_per_second": 21.11, "eval_steps_per_second": 2.641, "eval_wer": 1.0, "step": 800 }, { "epoch": 1.7408123791102514, "eval_cer": 0.929223783383958, "eval_loss": 5.361776351928711, "eval_runtime": 256.6054, "eval_samples_per_second": 21.274, "eval_steps_per_second": 2.662, "eval_wer": 1.0, "step": 900 }, { "epoch": 1.9342359767891684, "grad_norm": 29.7659969329834, "learning_rate": 2.3904e-06, "loss": 4.9912, "step": 1000 }, { "epoch": 1.9342359767891684, "eval_cer": 0.929223783383958, "eval_loss": 5.219295024871826, "eval_runtime": 262.3733, "eval_samples_per_second": 20.806, "eval_steps_per_second": 2.603, "eval_wer": 1.0, "step": 1000 }, { "epoch": 2.127659574468085, "eval_cer": 0.929223783383958, "eval_loss": 5.06879997253418, "eval_runtime": 246.2278, "eval_samples_per_second": 22.171, "eval_steps_per_second": 2.774, "eval_wer": 1.0, "step": 1100 }, { "epoch": 2.3210831721470018, "eval_cer": 0.929223783383958, "eval_loss": 4.91414737701416, "eval_runtime": 265.8995, "eval_samples_per_second": 20.53, "eval_steps_per_second": 2.569, "eval_wer": 1.0, "step": 1200 }, { "epoch": 2.514506769825919, "eval_cer": 0.929223783383958, "eval_loss": 4.753695964813232, "eval_runtime": 261.3741, "eval_samples_per_second": 20.886, "eval_steps_per_second": 2.613, "eval_wer": 1.0, "step": 1300 }, { "epoch": 2.7079303675048356, "eval_cer": 0.929223783383958, "eval_loss": 4.589428901672363, "eval_runtime": 262.0559, "eval_samples_per_second": 20.831, "eval_steps_per_second": 2.606, "eval_wer": 1.0, "step": 1400 }, { "epoch": 2.9013539651837523, "grad_norm": 19.784141540527344, "learning_rate": 3.5904e-06, "loss": 4.3024, "step": 1500 }, { "epoch": 2.9013539651837523, "eval_cer": 0.929223783383958, "eval_loss": 4.4225029945373535, "eval_runtime": 247.2412, "eval_samples_per_second": 22.08, "eval_steps_per_second": 2.762, "eval_wer": 1.0, "step": 1500 }, { "epoch": 3.094777562862669, "eval_cer": 0.929223783383958, "eval_loss": 4.2562456130981445, "eval_runtime": 265.0317, "eval_samples_per_second": 20.598, "eval_steps_per_second": 2.577, "eval_wer": 1.0, "step": 1600 }, { "epoch": 3.288201160541586, "eval_cer": 0.929223783383958, "eval_loss": 4.094423770904541, "eval_runtime": 262.7201, "eval_samples_per_second": 20.779, "eval_steps_per_second": 2.6, "eval_wer": 1.0, "step": 1700 }, { "epoch": 3.481624758220503, "eval_cer": 0.929223783383958, "eval_loss": 3.9344263076782227, "eval_runtime": 233.1037, "eval_samples_per_second": 23.419, "eval_steps_per_second": 2.93, "eval_wer": 1.0, "step": 1800 }, { "epoch": 3.6750483558994196, "eval_cer": 0.929223783383958, "eval_loss": 3.7835137844085693, "eval_runtime": 231.2092, "eval_samples_per_second": 23.611, "eval_steps_per_second": 2.954, "eval_wer": 1.0, "step": 1900 }, { "epoch": 3.8684719535783367, "grad_norm": 19.985584259033203, "learning_rate": 4.7904e-06, "loss": 3.6966, "step": 2000 }, { "epoch": 3.8684719535783367, "eval_cer": 0.929223783383958, "eval_loss": 3.641073703765869, "eval_runtime": 230.0297, "eval_samples_per_second": 23.732, "eval_steps_per_second": 2.969, "eval_wer": 1.0, "step": 2000 }, { "epoch": 4.061895551257253, "eval_cer": 0.929223783383958, "eval_loss": 3.5155856609344482, "eval_runtime": 229.2143, "eval_samples_per_second": 23.816, "eval_steps_per_second": 2.98, "eval_wer": 1.0, "step": 2100 }, { "epoch": 4.25531914893617, "eval_cer": 0.929223783383958, "eval_loss": 3.397294044494629, "eval_runtime": 229.4391, "eval_samples_per_second": 23.793, "eval_steps_per_second": 2.977, "eval_wer": 1.0, "step": 2200 }, { "epoch": 4.448742746615087, "eval_cer": 0.929223783383958, "eval_loss": 3.290855646133423, "eval_runtime": 231.2121, "eval_samples_per_second": 23.61, "eval_steps_per_second": 2.954, "eval_wer": 1.0, "step": 2300 }, { "epoch": 4.6421663442940035, "eval_cer": 0.929223783383958, "eval_loss": 3.1956613063812256, "eval_runtime": 230.2419, "eval_samples_per_second": 23.71, "eval_steps_per_second": 2.966, "eval_wer": 1.0, "step": 2400 }, { "epoch": 4.835589941972921, "grad_norm": 3.1049797534942627, "learning_rate": 5.9904e-06, "loss": 3.2011, "step": 2500 }, { "epoch": 4.835589941972921, "eval_cer": 0.929223783383958, "eval_loss": 3.115924596786499, "eval_runtime": 231.315, "eval_samples_per_second": 23.6, "eval_steps_per_second": 2.953, "eval_wer": 1.0, "step": 2500 }, { "epoch": 5.029013539651838, "eval_cer": 0.929223783383958, "eval_loss": 3.0544307231903076, "eval_runtime": 231.5463, "eval_samples_per_second": 23.576, "eval_steps_per_second": 2.95, "eval_wer": 1.0, "step": 2600 }, { "epoch": 5.222437137330754, "eval_cer": 0.929223783383958, "eval_loss": 3.0038533210754395, "eval_runtime": 237.992, "eval_samples_per_second": 22.938, "eval_steps_per_second": 2.87, "eval_wer": 1.0, "step": 2700 }, { "epoch": 5.415860735009671, "eval_cer": 0.929223783383958, "eval_loss": 2.9653820991516113, "eval_runtime": 231.2651, "eval_samples_per_second": 23.605, "eval_steps_per_second": 2.953, "eval_wer": 1.0, "step": 2800 }, { "epoch": 5.609284332688588, "eval_cer": 0.929223783383958, "eval_loss": 2.9386982917785645, "eval_runtime": 235.1119, "eval_samples_per_second": 23.219, "eval_steps_per_second": 2.905, "eval_wer": 1.0, "step": 2900 }, { "epoch": 5.802707930367505, "grad_norm": 0.9443885684013367, "learning_rate": 7.190400000000001e-06, "loss": 2.9439, "step": 3000 }, { "epoch": 5.802707930367505, "eval_cer": 0.929223783383958, "eval_loss": 2.9090564250946045, "eval_runtime": 234.7068, "eval_samples_per_second": 23.259, "eval_steps_per_second": 2.91, "eval_wer": 1.0, "step": 3000 }, { "epoch": 5.996131528046422, "eval_cer": 0.929223783383958, "eval_loss": 2.8868346214294434, "eval_runtime": 233.0298, "eval_samples_per_second": 23.426, "eval_steps_per_second": 2.931, "eval_wer": 1.0, "step": 3100 }, { "epoch": 6.189555125725338, "eval_cer": 0.929223783383958, "eval_loss": 2.8660459518432617, "eval_runtime": 237.6658, "eval_samples_per_second": 22.969, "eval_steps_per_second": 2.874, "eval_wer": 1.0, "step": 3200 }, { "epoch": 6.382978723404255, "eval_cer": 0.929223783383958, "eval_loss": 2.853316068649292, "eval_runtime": 238.1324, "eval_samples_per_second": 22.924, "eval_steps_per_second": 2.868, "eval_wer": 1.0, "step": 3300 }, { "epoch": 6.576402321083172, "eval_cer": 0.929223783383958, "eval_loss": 2.7336666584014893, "eval_runtime": 248.0434, "eval_samples_per_second": 22.008, "eval_steps_per_second": 2.754, "eval_wer": 1.0, "step": 3400 }, { "epoch": 6.769825918762089, "grad_norm": 5.135930061340332, "learning_rate": 8.3904e-06, "loss": 2.7884, "step": 3500 }, { "epoch": 6.769825918762089, "eval_cer": 0.929223783383958, "eval_loss": 2.523024559020996, "eval_runtime": 234.9913, "eval_samples_per_second": 23.231, "eval_steps_per_second": 2.906, "eval_wer": 1.0, "step": 3500 }, { "epoch": 6.963249516441006, "eval_cer": 0.9181765570048525, "eval_loss": 2.272446870803833, "eval_runtime": 233.6368, "eval_samples_per_second": 23.365, "eval_steps_per_second": 2.923, "eval_wer": 1.0, "step": 3600 }, { "epoch": 7.156673114119923, "eval_cer": 0.6315890404391495, "eval_loss": 1.9632701873779297, "eval_runtime": 237.5728, "eval_samples_per_second": 22.978, "eval_steps_per_second": 2.875, "eval_wer": 1.0, "step": 3700 }, { "epoch": 7.350096711798839, "eval_cer": 0.4242155476184769, "eval_loss": 1.5858280658721924, "eval_runtime": 277.2902, "eval_samples_per_second": 19.687, "eval_steps_per_second": 2.463, "eval_wer": 1.0, "step": 3800 }, { "epoch": 7.543520309477756, "eval_cer": 0.3861015892801496, "eval_loss": 1.351006031036377, "eval_runtime": 236.5551, "eval_samples_per_second": 23.077, "eval_steps_per_second": 2.887, "eval_wer": 0.9998168162667155, "step": 3900 }, { "epoch": 7.7369439071566735, "grad_norm": 3.2181670665740967, "learning_rate": 9.5904e-06, "loss": 1.7651, "step": 4000 }, { "epoch": 7.7369439071566735, "eval_cer": 0.33344405672233657, "eval_loss": 1.1917191743850708, "eval_runtime": 237.0965, "eval_samples_per_second": 23.024, "eval_steps_per_second": 2.881, "eval_wer": 0.999267265066862, "step": 4000 }, { "epoch": 7.93036750483559, "eval_cer": 0.29824771675815615, "eval_loss": 1.0715813636779785, "eval_runtime": 240.0983, "eval_samples_per_second": 22.737, "eval_steps_per_second": 2.845, "eval_wer": 0.9979849789338707, "step": 4100 }, { "epoch": 8.123791102514506, "eval_cer": 0.27819422708796343, "eval_loss": 0.9761540293693542, "eval_runtime": 238.5129, "eval_samples_per_second": 22.888, "eval_steps_per_second": 2.864, "eval_wer": 0.9976186114673017, "step": 4200 }, { "epoch": 8.317214700193423, "eval_cer": 0.2595721511271527, "eval_loss": 0.904407799243927, "eval_runtime": 234.8451, "eval_samples_per_second": 23.245, "eval_steps_per_second": 2.908, "eval_wer": 0.9965195090675948, "step": 4300 }, { "epoch": 8.51063829787234, "eval_cer": 0.2565654974505084, "eval_loss": 0.8529276251792908, "eval_runtime": 237.5826, "eval_samples_per_second": 22.977, "eval_steps_per_second": 2.875, "eval_wer": 0.9963363253343103, "step": 4400 }, { "epoch": 8.704061895551257, "grad_norm": 2.711160898208618, "learning_rate": 1.0790400000000001e-05, "loss": 0.9278, "step": 4500 }, { "epoch": 8.704061895551257, "eval_cer": 0.2466140901790637, "eval_loss": 0.7957596182823181, "eval_runtime": 234.6716, "eval_samples_per_second": 23.262, "eval_steps_per_second": 2.91, "eval_wer": 0.9970690602674482, "step": 4500 }, { "epoch": 8.897485493230175, "eval_cer": 0.24351497676521047, "eval_loss": 0.7534636855125427, "eval_runtime": 235.3455, "eval_samples_per_second": 23.196, "eval_steps_per_second": 2.902, "eval_wer": 0.9965195090675948, "step": 4600 }, { "epoch": 9.090909090909092, "eval_cer": 0.24028573483232255, "eval_loss": 0.7190229296684265, "eval_runtime": 235.8068, "eval_samples_per_second": 23.15, "eval_steps_per_second": 2.896, "eval_wer": 0.9974354277340172, "step": 4700 }, { "epoch": 9.284332688588007, "eval_cer": 0.2355908348429383, "eval_loss": 0.6800413727760315, "eval_runtime": 236.222, "eval_samples_per_second": 23.11, "eval_steps_per_second": 2.891, "eval_wer": 0.9974354277340172, "step": 4800 }, { "epoch": 9.477756286266924, "eval_cer": 0.23300196220108965, "eval_loss": 0.6568382978439331, "eval_runtime": 236.9709, "eval_samples_per_second": 23.037, "eval_steps_per_second": 2.882, "eval_wer": 0.9963363253343103, "step": 4900 }, { "epoch": 9.671179883945841, "grad_norm": 5.1745381355285645, "learning_rate": 1.19904e-05, "loss": 0.6673, "step": 5000 }, { "epoch": 9.671179883945841, "eval_cer": 0.23294032237628373, "eval_loss": 0.6317699551582336, "eval_runtime": 235.798, "eval_samples_per_second": 23.151, "eval_steps_per_second": 2.897, "eval_wer": 0.9959699578677413, "step": 5000 }, { "epoch": 9.864603481624759, "eval_cer": 0.22926932836562006, "eval_loss": 0.613182783126831, "eval_runtime": 236.7731, "eval_samples_per_second": 23.056, "eval_steps_per_second": 2.885, "eval_wer": 0.9972522440007328, "step": 5100 }, { "epoch": 10.058027079303676, "eval_cer": 0.2261496683434982, "eval_loss": 0.5896427631378174, "eval_runtime": 236.3817, "eval_samples_per_second": 23.094, "eval_steps_per_second": 2.889, "eval_wer": 0.9970690602674482, "step": 5200 }, { "epoch": 10.251450676982591, "eval_cer": 0.22313274136271954, "eval_loss": 0.5742546916007996, "eval_runtime": 234.8696, "eval_samples_per_second": 23.243, "eval_steps_per_second": 2.908, "eval_wer": 0.9961531416010259, "step": 5300 }, { "epoch": 10.444874274661508, "eval_cer": 0.2214684660929597, "eval_loss": 0.5562453866004944, "eval_runtime": 234.6327, "eval_samples_per_second": 23.266, "eval_steps_per_second": 2.911, "eval_wer": 0.9959699578677413, "step": 5400 }, { "epoch": 10.638297872340425, "grad_norm": 3.1148133277893066, "learning_rate": 1.31904e-05, "loss": 0.5392, "step": 5500 }, { "epoch": 10.638297872340425, "eval_cer": 0.2236943486553957, "eval_loss": 0.5472539067268372, "eval_runtime": 233.6555, "eval_samples_per_second": 23.363, "eval_steps_per_second": 2.923, "eval_wer": 0.9972522440007328, "step": 5500 }, { "epoch": 10.831721470019342, "eval_cer": 0.21852002780640986, "eval_loss": 0.5307034850120544, "eval_runtime": 233.484, "eval_samples_per_second": 23.381, "eval_steps_per_second": 2.925, "eval_wer": 0.9963363253343103, "step": 5600 }, { "epoch": 11.02514506769826, "eval_cer": 0.21733859783096304, "eval_loss": 0.5194967985153198, "eval_runtime": 235.0242, "eval_samples_per_second": 23.227, "eval_steps_per_second": 2.906, "eval_wer": 0.9976186114673017, "step": 5700 }, { "epoch": 11.218568665377177, "eval_cer": 0.21635578506877978, "eval_loss": 0.5090118050575256, "eval_runtime": 234.9867, "eval_samples_per_second": 23.231, "eval_steps_per_second": 2.907, "eval_wer": 0.9978017952005862, "step": 5800 }, { "epoch": 11.411992263056092, "eval_cer": 0.21347583547645874, "eval_loss": 0.4978716969490051, "eval_runtime": 235.5271, "eval_samples_per_second": 23.178, "eval_steps_per_second": 2.9, "eval_wer": 0.9974354277340172, "step": 5900 }, { "epoch": 11.60541586073501, "grad_norm": 2.333935499191284, "learning_rate": 1.43904e-05, "loss": 0.4572, "step": 6000 }, { "epoch": 11.60541586073501, "eval_cer": 0.21267451775398177, "eval_loss": 0.49008145928382874, "eval_runtime": 235.4425, "eval_samples_per_second": 23.186, "eval_steps_per_second": 2.901, "eval_wer": 0.9974354277340172, "step": 6000 }, { "epoch": 11.798839458413926, "eval_cer": 0.21371212147154808, "eval_loss": 0.487173467874527, "eval_runtime": 235.8902, "eval_samples_per_second": 23.142, "eval_steps_per_second": 2.895, "eval_wer": 0.999267265066862, "step": 6100 }, { "epoch": 11.992263056092844, "eval_cer": 0.211890322205062, "eval_loss": 0.4753509759902954, "eval_runtime": 236.7331, "eval_samples_per_second": 23.06, "eval_steps_per_second": 2.885, "eval_wer": 0.9972522440007328, "step": 6200 }, { "epoch": 12.18568665377176, "eval_cer": 0.21195538646457937, "eval_loss": 0.47239425778388977, "eval_runtime": 236.344, "eval_samples_per_second": 23.098, "eval_steps_per_second": 2.89, "eval_wer": 0.9968858765341637, "step": 6300 }, { "epoch": 12.379110251450676, "eval_cer": 0.20878093548707446, "eval_loss": 0.4649977684020996, "eval_runtime": 242.8375, "eval_samples_per_second": 22.48, "eval_steps_per_second": 2.813, "eval_wer": 0.9987177138670086, "step": 6400 }, { "epoch": 12.572533849129593, "grad_norm": 3.1772515773773193, "learning_rate": 1.5590400000000002e-05, "loss": 0.41, "step": 6500 }, { "epoch": 12.572533849129593, "eval_cer": 0.20764744759758783, "eval_loss": 0.4591744542121887, "eval_runtime": 237.1668, "eval_samples_per_second": 23.018, "eval_steps_per_second": 2.88, "eval_wer": 0.9976186114673017, "step": 6500 }, { "epoch": 12.76595744680851, "eval_cer": 0.20641122666675799, "eval_loss": 0.4502531886100769, "eval_runtime": 237.1617, "eval_samples_per_second": 23.018, "eval_steps_per_second": 2.88, "eval_wer": 0.9981681626671551, "step": 6600 }, { "epoch": 12.959381044487428, "eval_cer": 0.20994524328896408, "eval_loss": 0.44779568910598755, "eval_runtime": 236.6236, "eval_samples_per_second": 23.07, "eval_steps_per_second": 2.886, "eval_wer": 0.9963363253343103, "step": 6700 }, { "epoch": 13.152804642166345, "eval_cer": 0.2061133008468627, "eval_loss": 0.4495759606361389, "eval_runtime": 237.3684, "eval_samples_per_second": 22.998, "eval_steps_per_second": 2.877, "eval_wer": 0.9981681626671551, "step": 6800 }, { "epoch": 13.346228239845262, "eval_cer": 0.20519897677890822, "eval_loss": 0.443766713142395, "eval_runtime": 251.4879, "eval_samples_per_second": 21.707, "eval_steps_per_second": 2.716, "eval_wer": 0.9981681626671551, "step": 6900 }, { "epoch": 13.539651837524177, "grad_norm": 2.66337513923645, "learning_rate": 1.6790399999999998e-05, "loss": 0.3688, "step": 7000 }, { "epoch": 13.539651837524177, "eval_cer": 0.20395933141336695, "eval_loss": 0.4365153908729553, "eval_runtime": 238.8337, "eval_samples_per_second": 22.857, "eval_steps_per_second": 2.86, "eval_wer": 0.9990840813335776, "step": 7000 }, { "epoch": 13.733075435203094, "eval_cer": 0.20464421835565494, "eval_loss": 0.4288468062877655, "eval_runtime": 234.5955, "eval_samples_per_second": 23.27, "eval_steps_per_second": 2.911, "eval_wer": 0.9979849789338707, "step": 7100 }, { "epoch": 13.926499032882012, "eval_cer": 0.20250394666100494, "eval_loss": 0.4299309551715851, "eval_runtime": 237.0206, "eval_samples_per_second": 23.032, "eval_steps_per_second": 2.882, "eval_wer": 0.9981681626671551, "step": 7200 }, { "epoch": 14.119922630560929, "eval_cer": 0.20263407518003965, "eval_loss": 0.4274175465106964, "eval_runtime": 237.1451, "eval_samples_per_second": 23.02, "eval_steps_per_second": 2.88, "eval_wer": 0.9985345301337242, "step": 7300 }, { "epoch": 14.313346228239846, "eval_cer": 0.20056229217961846, "eval_loss": 0.42421066761016846, "eval_runtime": 244.6511, "eval_samples_per_second": 22.313, "eval_steps_per_second": 2.792, "eval_wer": 0.9983513464004397, "step": 7400 }, { "epoch": 14.506769825918763, "grad_norm": 1.9870613813400269, "learning_rate": 1.79904e-05, "loss": 0.3394, "step": 7500 }, { "epoch": 14.506769825918763, "eval_cer": 0.20014108671011133, "eval_loss": 0.4253482520580292, "eval_runtime": 244.8721, "eval_samples_per_second": 22.293, "eval_steps_per_second": 2.789, "eval_wer": 0.9970690602674482, "step": 7500 }, { "epoch": 14.700193423597678, "eval_cer": 0.19957263054801228, "eval_loss": 0.4177948236465454, "eval_runtime": 234.464, "eval_samples_per_second": 23.283, "eval_steps_per_second": 2.913, "eval_wer": 0.9974354277340172, "step": 7600 }, { "epoch": 14.893617021276595, "eval_cer": 0.20044928583414093, "eval_loss": 0.41819530725479126, "eval_runtime": 240.7737, "eval_samples_per_second": 22.673, "eval_steps_per_second": 2.837, "eval_wer": 0.9983513464004397, "step": 7700 }, { "epoch": 15.087040618955513, "eval_cer": 0.1979117797129639, "eval_loss": 0.41940802335739136, "eval_runtime": 239.6423, "eval_samples_per_second": 22.78, "eval_steps_per_second": 2.85, "eval_wer": 0.9970690602674482, "step": 7800 }, { "epoch": 15.28046421663443, "eval_cer": 0.19965481698108684, "eval_loss": 0.416002482175827, "eval_runtime": 233.6247, "eval_samples_per_second": 23.367, "eval_steps_per_second": 2.923, "eval_wer": 0.9978017952005862, "step": 7900 }, { "epoch": 15.473887814313347, "grad_norm": 3.6397812366485596, "learning_rate": 1.9190400000000002e-05, "loss": 0.3157, "step": 8000 }, { "epoch": 15.473887814313347, "eval_cer": 0.20096637547556837, "eval_loss": 0.40957844257354736, "eval_runtime": 233.3898, "eval_samples_per_second": 23.39, "eval_steps_per_second": 2.926, "eval_wer": 0.9974354277340172, "step": 8000 }, { "epoch": 15.667311411992262, "eval_cer": 0.19801108831959563, "eval_loss": 0.40878182649612427, "eval_runtime": 233.2723, "eval_samples_per_second": 23.402, "eval_steps_per_second": 2.928, "eval_wer": 0.9978017952005862, "step": 8100 }, { "epoch": 15.86073500967118, "eval_cer": 0.19737414346326781, "eval_loss": 0.4118936359882355, "eval_runtime": 235.4949, "eval_samples_per_second": 23.181, "eval_steps_per_second": 2.9, "eval_wer": 0.9983513464004397, "step": 8200 }, { "epoch": 16.054158607350097, "eval_cer": 0.1965180347854078, "eval_loss": 0.40991291403770447, "eval_runtime": 243.05, "eval_samples_per_second": 22.46, "eval_steps_per_second": 2.81, "eval_wer": 0.9983513464004397, "step": 8300 }, { "epoch": 16.247582205029012, "eval_cer": 0.19766864484845165, "eval_loss": 0.4085560739040375, "eval_runtime": 232.1832, "eval_samples_per_second": 23.512, "eval_steps_per_second": 2.942, "eval_wer": 0.9985345301337242, "step": 8400 }, { "epoch": 16.44100580270793, "grad_norm": 2.502861976623535, "learning_rate": 2.03904e-05, "loss": 0.2917, "step": 8500 }, { "epoch": 16.44100580270793, "eval_cer": 0.19676116964992005, "eval_loss": 0.40965768694877625, "eval_runtime": 233.1901, "eval_samples_per_second": 23.41, "eval_steps_per_second": 2.929, "eval_wer": 0.9983513464004397, "step": 8500 }, { "epoch": 16.634429400386846, "eval_cer": 0.19489485273218524, "eval_loss": 0.41127797961235046, "eval_runtime": 235.9189, "eval_samples_per_second": 23.139, "eval_steps_per_second": 2.895, "eval_wer": 0.9979849789338707, "step": 8600 }, { "epoch": 16.827852998065765, "eval_cer": 0.19560713515216477, "eval_loss": 0.40175729990005493, "eval_runtime": 236.2968, "eval_samples_per_second": 23.102, "eval_steps_per_second": 2.89, "eval_wer": 0.9983513464004397, "step": 8700 }, { "epoch": 17.02127659574468, "eval_cer": 0.19336413041617154, "eval_loss": 0.4042558968067169, "eval_runtime": 232.2491, "eval_samples_per_second": 23.505, "eval_steps_per_second": 2.941, "eval_wer": 0.9983513464004397, "step": 8800 }, { "epoch": 17.214700193423596, "eval_cer": 0.19456953143459843, "eval_loss": 0.4046263098716736, "eval_runtime": 233.5316, "eval_samples_per_second": 23.376, "eval_steps_per_second": 2.925, "eval_wer": 0.9979849789338707, "step": 8900 }, { "epoch": 17.408123791102515, "grad_norm": 1.7895680665969849, "learning_rate": 2.15904e-05, "loss": 0.2785, "step": 9000 }, { "epoch": 17.408123791102515, "eval_cer": 0.19269636564744075, "eval_loss": 0.4045611321926117, "eval_runtime": 235.9629, "eval_samples_per_second": 23.135, "eval_steps_per_second": 2.895, "eval_wer": 0.9981681626671551, "step": 9000 }, { "epoch": 17.60154738878143, "eval_cer": 0.19476814864786196, "eval_loss": 0.40159401297569275, "eval_runtime": 235.3013, "eval_samples_per_second": 23.2, "eval_steps_per_second": 2.903, "eval_wer": 0.9989008976002931, "step": 9100 }, { "epoch": 17.79497098646035, "eval_cer": 0.19216557826716754, "eval_loss": 0.40133848786354065, "eval_runtime": 232.8231, "eval_samples_per_second": 23.447, "eval_steps_per_second": 2.934, "eval_wer": 0.9983513464004397, "step": 9200 }, { "epoch": 17.988394584139265, "eval_cer": 0.19296004712022163, "eval_loss": 0.3879222273826599, "eval_runtime": 258.2005, "eval_samples_per_second": 21.142, "eval_steps_per_second": 2.645, "eval_wer": 0.9989008976002931, "step": 9300 }, { "epoch": 18.181818181818183, "eval_cer": 0.19279224981936108, "eval_loss": 0.40087568759918213, "eval_runtime": 233.7931, "eval_samples_per_second": 23.35, "eval_steps_per_second": 2.921, "eval_wer": 0.9979849789338707, "step": 9400 }, { "epoch": 18.3752417794971, "grad_norm": 2.837678909301758, "learning_rate": 2.27904e-05, "loss": 0.2647, "step": 9500 }, { "epoch": 18.3752417794971, "eval_cer": 0.19262787695321196, "eval_loss": 0.39038729667663574, "eval_runtime": 234.0768, "eval_samples_per_second": 23.321, "eval_steps_per_second": 2.918, "eval_wer": 0.9985345301337242, "step": 9500 }, { "epoch": 18.568665377176014, "eval_cer": 0.19593930531917444, "eval_loss": 0.3944104015827179, "eval_runtime": 237.4314, "eval_samples_per_second": 22.992, "eval_steps_per_second": 2.877, "eval_wer": 0.9983513464004397, "step": 9600 }, { "epoch": 18.762088974854933, "eval_cer": 0.19592218314561724, "eval_loss": 0.39569512009620667, "eval_runtime": 246.3149, "eval_samples_per_second": 22.163, "eval_steps_per_second": 2.773, "eval_wer": 0.9989008976002931, "step": 9700 }, { "epoch": 18.95551257253385, "eval_cer": 0.1937613648426986, "eval_loss": 0.39492446184158325, "eval_runtime": 243.7942, "eval_samples_per_second": 22.392, "eval_steps_per_second": 2.802, "eval_wer": 0.9981681626671551, "step": 9800 }, { "epoch": 19.148936170212767, "eval_cer": 0.1932579729401169, "eval_loss": 0.40386101603507996, "eval_runtime": 249.0686, "eval_samples_per_second": 21.918, "eval_steps_per_second": 2.742, "eval_wer": 0.9972522440007328, "step": 9900 }, { "epoch": 19.342359767891683, "grad_norm": 3.1278374195098877, "learning_rate": 2.39904e-05, "loss": 0.248, "step": 10000 }, { "epoch": 19.342359767891683, "eval_cer": 0.19337097928559444, "eval_loss": 0.40820688009262085, "eval_runtime": 231.3525, "eval_samples_per_second": 23.596, "eval_steps_per_second": 2.952, "eval_wer": 0.9990840813335776, "step": 10000 }, { "epoch": 19.535783365570598, "eval_cer": 0.19222379365726203, "eval_loss": 0.4074006974697113, "eval_runtime": 253.2468, "eval_samples_per_second": 21.556, "eval_steps_per_second": 2.697, "eval_wer": 0.999267265066862, "step": 10100 }, { "epoch": 19.729206963249517, "eval_cer": 0.19064170482057674, "eval_loss": 0.39546000957489014, "eval_runtime": 235.3118, "eval_samples_per_second": 23.199, "eval_steps_per_second": 2.903, "eval_wer": 0.9989008976002931, "step": 10200 }, { "epoch": 19.922630560928432, "eval_cer": 0.19093620620576057, "eval_loss": 0.38558459281921387, "eval_runtime": 246.5841, "eval_samples_per_second": 22.138, "eval_steps_per_second": 2.77, "eval_wer": 0.9979849789338707, "step": 10300 }, { "epoch": 20.0, "step": 10340, "total_flos": 1.4614133359859188e+19, "train_loss": 1.9748668191050192, "train_runtime": 66473.8913, "train_samples_per_second": 4.974, "train_steps_per_second": 0.156 } ], "logging_steps": 500, "max_steps": 10340, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4614133359859188e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }