{ "best_metric": 2.176206588745117, "best_model_checkpoint": "./Hubert-common_voice_JSUT-ja-demo-japanese/checkpoint-10300", "epoch": 20.0, "eval_steps": 100, "global_step": 10340, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19342359767891681, "eval_cer": 8.48504498730911, "eval_loss": 84.69584655761719, "eval_runtime": 236.8633, "eval_samples_per_second": 23.055, "eval_steps_per_second": 2.884, "eval_wer": 1.0115257958287596, "step": 100 }, { "epoch": 0.38684719535783363, "eval_cer": 8.344315240030694, "eval_loss": 83.78863525390625, "eval_runtime": 232.6848, "eval_samples_per_second": 23.47, "eval_steps_per_second": 2.935, "eval_wer": 1.008964507866813, "step": 200 }, { "epoch": 0.5802707930367504, "eval_cer": 4.815741763569977, "eval_loss": 81.74565124511719, "eval_runtime": 231.6247, "eval_samples_per_second": 23.577, "eval_steps_per_second": 2.949, "eval_wer": 1.000365898280278, "step": 300 }, { "epoch": 0.7736943907156673, "eval_cer": 0.9907074011923535, "eval_loss": 75.43038940429688, "eval_runtime": 227.0361, "eval_samples_per_second": 24.053, "eval_steps_per_second": 3.008, "eval_wer": 1.0, "step": 400 }, { "epoch": 0.9671179883945842, "grad_norm": 140.793212890625, "learning_rate": 1.188e-06, "loss": 66.0277, "step": 500 }, { "epoch": 0.9671179883945842, "eval_cer": 0.9907495636188854, "eval_loss": 63.1251106262207, "eval_runtime": 226.7903, "eval_samples_per_second": 24.08, "eval_steps_per_second": 3.012, "eval_wer": 1.0, "step": 500 }, { "epoch": 1.1605415860735009, "eval_cer": 0.9907074011923535, "eval_loss": 57.10499572753906, "eval_runtime": 255.6303, "eval_samples_per_second": 21.363, "eval_steps_per_second": 2.672, "eval_wer": 1.0, "step": 600 }, { "epoch": 1.3539651837524178, "eval_cer": 0.9907917260454173, "eval_loss": 55.67994689941406, "eval_runtime": 269.8181, "eval_samples_per_second": 20.24, "eval_steps_per_second": 2.531, "eval_wer": 1.0, "step": 700 }, { "epoch": 1.5473887814313345, "eval_cer": 0.9907495636188854, "eval_loss": 55.04756546020508, "eval_runtime": 241.9741, "eval_samples_per_second": 22.569, "eval_steps_per_second": 2.823, "eval_wer": 1.0, "step": 800 }, { "epoch": 1.7408123791102514, "eval_cer": 0.9907074011923535, "eval_loss": 54.408546447753906, "eval_runtime": 256.9455, "eval_samples_per_second": 21.254, "eval_steps_per_second": 2.658, "eval_wer": 1.0, "step": 900 }, { "epoch": 1.9342359767891684, "grad_norm": 141.13522338867188, "learning_rate": 2.3855999999999997e-06, "loss": 46.3141, "step": 1000 }, { "epoch": 1.9342359767891684, "eval_cer": 0.9907917260454173, "eval_loss": 53.68925857543945, "eval_runtime": 260.4063, "eval_samples_per_second": 20.971, "eval_steps_per_second": 2.623, "eval_wer": 1.0, "step": 1000 }, { "epoch": 2.127659574468085, "eval_cer": 0.9907074011923535, "eval_loss": 52.97110366821289, "eval_runtime": 254.697, "eval_samples_per_second": 21.441, "eval_steps_per_second": 2.682, "eval_wer": 1.0, "step": 1100 }, { "epoch": 2.3210831721470018, "eval_cer": 0.9907495636188854, "eval_loss": 52.13261032104492, "eval_runtime": 246.5799, "eval_samples_per_second": 22.147, "eval_steps_per_second": 2.77, "eval_wer": 1.0, "step": 1200 }, { "epoch": 2.514506769825919, "eval_cer": 0.9907074011923535, "eval_loss": 51.254920959472656, "eval_runtime": 260.2756, "eval_samples_per_second": 20.982, "eval_steps_per_second": 2.624, "eval_wer": 1.0, "step": 1300 }, { "epoch": 2.7079303675048356, "eval_cer": 0.9907074011923535, "eval_loss": 50.26494598388672, "eval_runtime": 261.2096, "eval_samples_per_second": 20.907, "eval_steps_per_second": 2.615, "eval_wer": 1.0, "step": 1400 }, { "epoch": 2.9013539651837523, "grad_norm": 120.8663101196289, "learning_rate": 3.5856e-06, "loss": 42.8642, "step": 1500 }, { "epoch": 2.9013539651837523, "eval_cer": 0.9907074011923535, "eval_loss": 49.20811080932617, "eval_runtime": 258.4268, "eval_samples_per_second": 21.132, "eval_steps_per_second": 2.643, "eval_wer": 1.0, "step": 1500 }, { "epoch": 3.094777562862669, "eval_cer": 0.9907495636188854, "eval_loss": 48.105064392089844, "eval_runtime": 247.347, "eval_samples_per_second": 22.078, "eval_steps_per_second": 2.761, "eval_wer": 1.0, "step": 1600 }, { "epoch": 3.288201160541586, "eval_cer": 0.9907074011923535, "eval_loss": 46.87884521484375, "eval_runtime": 262.3223, "eval_samples_per_second": 20.818, "eval_steps_per_second": 2.604, "eval_wer": 1.0, "step": 1700 }, { "epoch": 3.481624758220503, "eval_cer": 0.9907074011923535, "eval_loss": 45.54108810424805, "eval_runtime": 228.7269, "eval_samples_per_second": 23.876, "eval_steps_per_second": 2.986, "eval_wer": 1.0, "step": 1800 }, { "epoch": 3.6750483558994196, "eval_cer": 0.9907074011923535, "eval_loss": 44.151554107666016, "eval_runtime": 228.7844, "eval_samples_per_second": 23.87, "eval_steps_per_second": 2.985, "eval_wer": 1.0, "step": 1900 }, { "epoch": 3.8684719535783367, "grad_norm": 172.9249725341797, "learning_rate": 4.7856e-06, "loss": 38.3378, "step": 2000 }, { "epoch": 3.8684719535783367, "eval_cer": 0.9907495636188854, "eval_loss": 42.60865783691406, "eval_runtime": 227.6693, "eval_samples_per_second": 23.987, "eval_steps_per_second": 3.0, "eval_wer": 1.0, "step": 2000 }, { "epoch": 4.061895551257253, "eval_cer": 0.9907074011923535, "eval_loss": 40.9814567565918, "eval_runtime": 230.7363, "eval_samples_per_second": 23.668, "eval_steps_per_second": 2.96, "eval_wer": 1.0, "step": 2100 }, { "epoch": 4.25531914893617, "eval_cer": 0.9907074011923535, "eval_loss": 39.240055084228516, "eval_runtime": 230.4939, "eval_samples_per_second": 23.693, "eval_steps_per_second": 2.963, "eval_wer": 1.0, "step": 2200 }, { "epoch": 4.448742746615087, "eval_cer": 0.9907917260454173, "eval_loss": 37.40217590332031, "eval_runtime": 228.8585, "eval_samples_per_second": 23.862, "eval_steps_per_second": 2.984, "eval_wer": 1.0, "step": 2300 }, { "epoch": 4.6421663442940035, "eval_cer": 0.9907495636188854, "eval_loss": 35.430931091308594, "eval_runtime": 229.2781, "eval_samples_per_second": 23.818, "eval_steps_per_second": 2.979, "eval_wer": 1.0, "step": 2400 }, { "epoch": 4.835589941972921, "grad_norm": 113.81422424316406, "learning_rate": 5.9856e-06, "loss": 31.9192, "step": 2500 }, { "epoch": 4.835589941972921, "eval_cer": 0.9907074011923535, "eval_loss": 33.417510986328125, "eval_runtime": 233.5128, "eval_samples_per_second": 23.386, "eval_steps_per_second": 2.925, "eval_wer": 1.0, "step": 2500 }, { "epoch": 5.029013539651838, "eval_cer": 0.9907495636188854, "eval_loss": 31.266008377075195, "eval_runtime": 235.6664, "eval_samples_per_second": 23.173, "eval_steps_per_second": 2.898, "eval_wer": 1.0, "step": 2600 }, { "epoch": 5.222437137330754, "eval_cer": 0.9907917260454173, "eval_loss": 29.01472282409668, "eval_runtime": 238.0062, "eval_samples_per_second": 22.945, "eval_steps_per_second": 2.87, "eval_wer": 1.0, "step": 2700 }, { "epoch": 5.415860735009671, "eval_cer": 0.9907495636188854, "eval_loss": 26.68852996826172, "eval_runtime": 267.9224, "eval_samples_per_second": 20.383, "eval_steps_per_second": 2.549, "eval_wer": 1.0, "step": 2800 }, { "epoch": 5.609284332688588, "eval_cer": 0.9907495636188854, "eval_loss": 24.301000595092773, "eval_runtime": 235.2693, "eval_samples_per_second": 23.212, "eval_steps_per_second": 2.903, "eval_wer": 1.0, "step": 2900 }, { "epoch": 5.802707930367505, "grad_norm": 106.30471801757812, "learning_rate": 7.1856e-06, "loss": 23.4284, "step": 3000 }, { "epoch": 5.802707930367505, "eval_cer": 0.9907495636188854, "eval_loss": 21.88077735900879, "eval_runtime": 232.75, "eval_samples_per_second": 23.463, "eval_steps_per_second": 2.934, "eval_wer": 1.0, "step": 3000 }, { "epoch": 5.996131528046422, "eval_cer": 0.9907917260454173, "eval_loss": 19.473451614379883, "eval_runtime": 233.7728, "eval_samples_per_second": 23.36, "eval_steps_per_second": 2.922, "eval_wer": 1.0, "step": 3100 }, { "epoch": 6.189555125725338, "eval_cer": 0.9908760508984813, "eval_loss": 17.129289627075195, "eval_runtime": 240.7161, "eval_samples_per_second": 22.686, "eval_steps_per_second": 2.837, "eval_wer": 1.0, "step": 3200 }, { "epoch": 6.382978723404255, "eval_cer": 0.9907917260454173, "eval_loss": 14.863801002502441, "eval_runtime": 233.6375, "eval_samples_per_second": 23.374, "eval_steps_per_second": 2.923, "eval_wer": 1.0, "step": 3300 }, { "epoch": 6.576402321083172, "eval_cer": 0.9907074011923535, "eval_loss": 12.806206703186035, "eval_runtime": 251.782, "eval_samples_per_second": 21.689, "eval_steps_per_second": 2.713, "eval_wer": 1.0, "step": 3400 }, { "epoch": 6.769825918762089, "grad_norm": 82.23467254638672, "learning_rate": 8.3856e-06, "loss": 13.9431, "step": 3500 }, { "epoch": 6.769825918762089, "eval_cer": 0.9907074011923535, "eval_loss": 10.964253425598145, "eval_runtime": 233.2904, "eval_samples_per_second": 23.409, "eval_steps_per_second": 2.928, "eval_wer": 1.0, "step": 3500 }, { "epoch": 6.963249516441006, "eval_cer": 0.9907495636188854, "eval_loss": 9.411906242370605, "eval_runtime": 235.0547, "eval_samples_per_second": 23.233, "eval_steps_per_second": 2.906, "eval_wer": 1.0, "step": 3600 }, { "epoch": 7.156673114119923, "eval_cer": 0.9907495636188854, "eval_loss": 8.164007186889648, "eval_runtime": 287.0729, "eval_samples_per_second": 19.023, "eval_steps_per_second": 2.379, "eval_wer": 1.0, "step": 3700 }, { "epoch": 7.350096711798839, "eval_cer": 0.9907495636188854, "eval_loss": 7.22973108291626, "eval_runtime": 237.3392, "eval_samples_per_second": 23.009, "eval_steps_per_second": 2.878, "eval_wer": 1.0, "step": 3800 }, { "epoch": 7.543520309477756, "eval_cer": 0.9907074011923535, "eval_loss": 6.571568489074707, "eval_runtime": 232.6033, "eval_samples_per_second": 23.478, "eval_steps_per_second": 2.936, "eval_wer": 1.0, "step": 3900 }, { "epoch": 7.7369439071566735, "grad_norm": 4.932778358459473, "learning_rate": 9.585600000000002e-06, "loss": 7.4585, "step": 4000 }, { "epoch": 7.7369439071566735, "eval_cer": 0.9907495636188854, "eval_loss": 6.141250133514404, "eval_runtime": 234.2968, "eval_samples_per_second": 23.308, "eval_steps_per_second": 2.915, "eval_wer": 1.0, "step": 4000 }, { "epoch": 7.93036750483559, "eval_cer": 0.9907074011923535, "eval_loss": 5.885389804840088, "eval_runtime": 236.1436, "eval_samples_per_second": 23.126, "eval_steps_per_second": 2.892, "eval_wer": 1.0, "step": 4100 }, { "epoch": 8.123791102514506, "eval_cer": 0.9907495636188854, "eval_loss": 5.770660877227783, "eval_runtime": 234.5762, "eval_samples_per_second": 23.28, "eval_steps_per_second": 2.912, "eval_wer": 1.0, "step": 4200 }, { "epoch": 8.317214700193423, "eval_cer": 0.9907074011923535, "eval_loss": 5.680200576782227, "eval_runtime": 231.827, "eval_samples_per_second": 23.556, "eval_steps_per_second": 2.946, "eval_wer": 1.0, "step": 4300 }, { "epoch": 8.51063829787234, "eval_cer": 0.9907495636188854, "eval_loss": 5.597055435180664, "eval_runtime": 232.7784, "eval_samples_per_second": 23.46, "eval_steps_per_second": 2.934, "eval_wer": 1.0, "step": 4400 }, { "epoch": 8.704061895551257, "grad_norm": 1.6451424360275269, "learning_rate": 1.07856e-05, "loss": 5.7398, "step": 4500 }, { "epoch": 8.704061895551257, "eval_cer": 0.9907495636188854, "eval_loss": 5.533324241638184, "eval_runtime": 232.5892, "eval_samples_per_second": 23.479, "eval_steps_per_second": 2.937, "eval_wer": 1.0, "step": 4500 }, { "epoch": 8.897485493230175, "eval_cer": 0.9907074011923535, "eval_loss": 5.475062370300293, "eval_runtime": 233.1057, "eval_samples_per_second": 23.427, "eval_steps_per_second": 2.93, "eval_wer": 1.0, "step": 4600 }, { "epoch": 9.090909090909092, "eval_cer": 0.9907495636188854, "eval_loss": 5.425434589385986, "eval_runtime": 232.7779, "eval_samples_per_second": 23.46, "eval_steps_per_second": 2.934, "eval_wer": 1.0, "step": 4700 }, { "epoch": 9.284332688588007, "eval_cer": 0.9908423209572558, "eval_loss": 5.377471923828125, "eval_runtime": 234.4379, "eval_samples_per_second": 23.294, "eval_steps_per_second": 2.913, "eval_wer": 1.1319063300402488, "step": 4800 }, { "epoch": 9.477756286266924, "eval_cer": 0.9907495636188854, "eval_loss": 5.343258380889893, "eval_runtime": 239.0036, "eval_samples_per_second": 22.849, "eval_steps_per_second": 2.858, "eval_wer": 1.33205268935236, "step": 4900 }, { "epoch": 9.671179883945841, "grad_norm": 2.292585611343384, "learning_rate": 1.19856e-05, "loss": 5.4159, "step": 5000 }, { "epoch": 9.671179883945841, "eval_cer": 0.9906146438539831, "eval_loss": 5.311874866485596, "eval_runtime": 235.3723, "eval_samples_per_second": 23.202, "eval_steps_per_second": 2.902, "eval_wer": 1.686242224661544, "step": 5000 }, { "epoch": 9.864603481624759, "eval_cer": 0.9909688082368516, "eval_loss": 5.269064426422119, "eval_runtime": 234.5017, "eval_samples_per_second": 23.288, "eval_steps_per_second": 2.913, "eval_wer": 1.4255396999634102, "step": 5100 }, { "epoch": 10.058027079303676, "eval_cer": 0.9909182133250133, "eval_loss": 5.236879825592041, "eval_runtime": 235.3148, "eval_samples_per_second": 23.207, "eval_steps_per_second": 2.902, "eval_wer": 1.4043175997072814, "step": 5200 }, { "epoch": 10.251450676982591, "eval_cer": 0.9909603757515453, "eval_loss": 5.194947719573975, "eval_runtime": 232.4545, "eval_samples_per_second": 23.493, "eval_steps_per_second": 2.938, "eval_wer": 1.5686059275521405, "step": 5300 }, { "epoch": 10.444874274661508, "eval_cer": 0.9908170235013366, "eval_loss": 5.151918888092041, "eval_runtime": 231.5581, "eval_samples_per_second": 23.584, "eval_steps_per_second": 2.95, "eval_wer": 1.5166483717526527, "step": 5400 }, { "epoch": 10.638297872340425, "grad_norm": 2.0632503032684326, "learning_rate": 1.3185600000000001e-05, "loss": 5.2163, "step": 5500 }, { "epoch": 10.638297872340425, "eval_cer": 0.9909603757515453, "eval_loss": 5.108114242553711, "eval_runtime": 232.6627, "eval_samples_per_second": 23.472, "eval_steps_per_second": 2.936, "eval_wer": 1.247713135748262, "step": 5500 }, { "epoch": 10.831721470019342, "eval_cer": 0.9907664285894981, "eval_loss": 5.055335998535156, "eval_runtime": 231.9668, "eval_samples_per_second": 23.542, "eval_steps_per_second": 2.944, "eval_wer": 1.5124405415294548, "step": 5600 }, { "epoch": 11.02514506769826, "eval_cer": 0.9908760508984813, "eval_loss": 5.012271881103516, "eval_runtime": 231.643, "eval_samples_per_second": 23.575, "eval_steps_per_second": 2.949, "eval_wer": 1.5495792169776803, "step": 5700 }, { "epoch": 11.218568665377177, "eval_cer": 0.9885824148951421, "eval_loss": 4.942389965057373, "eval_runtime": 232.3058, "eval_samples_per_second": 23.508, "eval_steps_per_second": 2.94, "eval_wer": 1.7621661178192463, "step": 5800 }, { "epoch": 11.411992263056092, "eval_cer": 0.9830675695047602, "eval_loss": 4.8753485679626465, "eval_runtime": 233.7911, "eval_samples_per_second": 23.358, "eval_steps_per_second": 2.921, "eval_wer": 1.5404317599707282, "step": 5900 }, { "epoch": 11.60541586073501, "grad_norm": 1.9146788120269775, "learning_rate": 1.43856e-05, "loss": 4.9465, "step": 6000 }, { "epoch": 11.60541586073501, "eval_cer": 0.9749808160959279, "eval_loss": 4.77677059173584, "eval_runtime": 232.0203, "eval_samples_per_second": 23.537, "eval_steps_per_second": 2.944, "eval_wer": 1.853457738748628, "step": 6000 }, { "epoch": 11.798839458413926, "eval_cer": 0.9713126849876464, "eval_loss": 4.6841044425964355, "eval_runtime": 231.7273, "eval_samples_per_second": 23.566, "eval_steps_per_second": 2.947, "eval_wer": 1.8395536040980607, "step": 6100 }, { "epoch": 11.992263056092844, "eval_cer": 0.9697020802941251, "eval_loss": 4.582820892333984, "eval_runtime": 232.9124, "eval_samples_per_second": 23.447, "eval_steps_per_second": 2.932, "eval_wer": 1.7444200512257593, "step": 6200 }, { "epoch": 12.18568665377176, "eval_cer": 0.968909426675324, "eval_loss": 4.485307693481445, "eval_runtime": 232.9842, "eval_samples_per_second": 23.439, "eval_steps_per_second": 2.932, "eval_wer": 1.801317233809001, "step": 6300 }, { "epoch": 12.379110251450676, "eval_cer": 0.9556029648618337, "eval_loss": 4.395504951477051, "eval_runtime": 233.0172, "eval_samples_per_second": 23.436, "eval_steps_per_second": 2.931, "eval_wer": 1.827844859129162, "step": 6400 }, { "epoch": 12.572533849129593, "grad_norm": 4.459765434265137, "learning_rate": 1.55856e-05, "loss": 4.5094, "step": 6500 }, { "epoch": 12.572533849129593, "eval_cer": 0.9123021528134987, "eval_loss": 4.284241676330566, "eval_runtime": 230.5488, "eval_samples_per_second": 23.687, "eval_steps_per_second": 2.962, "eval_wer": 1.8728503476033662, "step": 6500 }, { "epoch": 12.76595744680851, "eval_cer": 0.8650380726711584, "eval_loss": 4.181938171386719, "eval_runtime": 232.0403, "eval_samples_per_second": 23.535, "eval_steps_per_second": 2.943, "eval_wer": 1.9094401756311745, "step": 6600 }, { "epoch": 12.959381044487428, "eval_cer": 0.848628456264915, "eval_loss": 4.074057579040527, "eval_runtime": 231.4468, "eval_samples_per_second": 23.595, "eval_steps_per_second": 2.951, "eval_wer": 1.9134650567142335, "step": 6700 }, { "epoch": 13.152804642166345, "eval_cer": 0.8385937987503057, "eval_loss": 3.9648523330688477, "eval_runtime": 231.3067, "eval_samples_per_second": 23.609, "eval_steps_per_second": 2.953, "eval_wer": 1.9191364800585438, "step": 6800 }, { "epoch": 13.346228239845262, "eval_cer": 0.8189292430157941, "eval_loss": 3.8640658855438232, "eval_runtime": 237.0711, "eval_samples_per_second": 23.035, "eval_steps_per_second": 2.881, "eval_wer": 1.9195023783388219, "step": 6900 }, { "epoch": 13.539651837524177, "grad_norm": 4.586193084716797, "learning_rate": 1.67856e-05, "loss": 4.0097, "step": 7000 }, { "epoch": 13.539651837524177, "eval_cer": 0.8014487009756386, "eval_loss": 3.7686750888824463, "eval_runtime": 244.328, "eval_samples_per_second": 22.351, "eval_steps_per_second": 2.795, "eval_wer": 1.9275521405049396, "step": 7000 }, { "epoch": 13.733075435203094, "eval_cer": 0.7963301823946571, "eval_loss": 3.680776596069336, "eval_runtime": 230.3109, "eval_samples_per_second": 23.711, "eval_steps_per_second": 2.966, "eval_wer": 1.9259055982436883, "step": 7100 }, { "epoch": 13.926499032882012, "eval_cer": 0.779220669707983, "eval_loss": 3.60208797454834, "eval_runtime": 229.5487, "eval_samples_per_second": 23.79, "eval_steps_per_second": 2.975, "eval_wer": 1.9275521405049396, "step": 7200 }, { "epoch": 14.119922630560929, "eval_cer": 0.777466712764253, "eval_loss": 3.55332088470459, "eval_runtime": 231.4776, "eval_samples_per_second": 23.592, "eval_steps_per_second": 2.951, "eval_wer": 1.9366995975118917, "step": 7300 }, { "epoch": 14.313346228239846, "eval_cer": 0.775071886937237, "eval_loss": 3.476841449737549, "eval_runtime": 240.5885, "eval_samples_per_second": 22.699, "eval_steps_per_second": 2.839, "eval_wer": 1.9321258690084158, "step": 7400 }, { "epoch": 14.506769825918763, "grad_norm": 4.310749053955078, "learning_rate": 1.7985600000000003e-05, "loss": 3.5619, "step": 7500 }, { "epoch": 14.506769825918763, "eval_cer": 0.7672043781463711, "eval_loss": 3.4284844398498535, "eval_runtime": 240.9512, "eval_samples_per_second": 22.664, "eval_steps_per_second": 2.835, "eval_wer": 1.938529088913282, "step": 7500 }, { "epoch": 14.700193423597678, "eval_cer": 0.7659816677769439, "eval_loss": 3.362793445587158, "eval_runtime": 230.1401, "eval_samples_per_second": 23.729, "eval_steps_per_second": 2.968, "eval_wer": 1.9361507500914745, "step": 7600 }, { "epoch": 14.893617021276595, "eval_cer": 0.7618919124033426, "eval_loss": 3.2909657955169678, "eval_runtime": 233.6336, "eval_samples_per_second": 23.374, "eval_steps_per_second": 2.923, "eval_wer": 1.9313940724478595, "step": 7700 }, { "epoch": 15.087040618955513, "eval_cer": 0.7486360455016907, "eval_loss": 3.2242627143859863, "eval_runtime": 241.9894, "eval_samples_per_second": 22.567, "eval_steps_per_second": 2.822, "eval_wer": 1.928832784485913, "step": 7800 }, { "epoch": 15.28046421663443, "eval_cer": 0.7431802275084536, "eval_loss": 3.164518117904663, "eval_runtime": 232.2322, "eval_samples_per_second": 23.515, "eval_steps_per_second": 2.941, "eval_wer": 1.9308452250274424, "step": 7900 }, { "epoch": 15.473887814313347, "grad_norm": 5.67767333984375, "learning_rate": 1.91856e-05, "loss": 3.2379, "step": 8000 }, { "epoch": 15.473887814313347, "eval_cer": 0.7382556560895193, "eval_loss": 3.1185944080352783, "eval_runtime": 235.8886, "eval_samples_per_second": 23.151, "eval_steps_per_second": 2.895, "eval_wer": 1.9332235638492499, "step": 8000 }, { "epoch": 15.667311411992262, "eval_cer": 0.7374714349560246, "eval_loss": 3.078275203704834, "eval_runtime": 234.7574, "eval_samples_per_second": 23.262, "eval_steps_per_second": 2.909, "eval_wer": 1.9348701061105014, "step": 8100 }, { "epoch": 15.86073500967118, "eval_cer": 0.727942726559799, "eval_loss": 3.0145950317382812, "eval_runtime": 231.4339, "eval_samples_per_second": 23.596, "eval_steps_per_second": 2.951, "eval_wer": 1.9321258690084158, "step": 8200 }, { "epoch": 16.054158607350097, "eval_cer": 0.7300086854598656, "eval_loss": 2.9523487091064453, "eval_runtime": 235.1315, "eval_samples_per_second": 23.225, "eval_steps_per_second": 2.905, "eval_wer": 1.9308452250274424, "step": 8300 }, { "epoch": 16.247582205029012, "eval_cer": 0.7254467109091063, "eval_loss": 2.918687105178833, "eval_runtime": 238.3374, "eval_samples_per_second": 22.913, "eval_steps_per_second": 2.866, "eval_wer": 1.9273691913648006, "step": 8400 }, { "epoch": 16.44100580270793, "grad_norm": 8.755329132080078, "learning_rate": 2.03856e-05, "loss": 2.9448, "step": 8500 }, { "epoch": 16.44100580270793, "eval_cer": 0.7177394193390618, "eval_loss": 2.8671371936798096, "eval_runtime": 229.0989, "eval_samples_per_second": 23.837, "eval_steps_per_second": 2.981, "eval_wer": 1.929015733626052, "step": 8500 }, { "epoch": 16.634429400386846, "eval_cer": 0.7115752725800876, "eval_loss": 2.8188540935516357, "eval_runtime": 230.6192, "eval_samples_per_second": 23.68, "eval_steps_per_second": 2.962, "eval_wer": 1.9348701061105014, "step": 8600 }, { "epoch": 16.827852998065765, "eval_cer": 0.70778065419221, "eval_loss": 2.76908802986145, "eval_runtime": 239.0894, "eval_samples_per_second": 22.841, "eval_steps_per_second": 2.857, "eval_wer": 1.9365166483717526, "step": 8700 }, { "epoch": 17.02127659574468, "eval_cer": 0.7069205406909579, "eval_loss": 2.731661081314087, "eval_runtime": 231.1522, "eval_samples_per_second": 23.625, "eval_steps_per_second": 2.955, "eval_wer": 1.9420051225759238, "step": 8800 }, { "epoch": 17.214700193423596, "eval_cer": 0.7056388029243859, "eval_loss": 2.683185577392578, "eval_runtime": 229.2327, "eval_samples_per_second": 23.823, "eval_steps_per_second": 2.98, "eval_wer": 1.9489571899012075, "step": 8900 }, { "epoch": 17.408123791102515, "grad_norm": 4.358935832977295, "learning_rate": 2.1585600000000002e-05, "loss": 2.6749, "step": 9000 }, { "epoch": 17.408123791102515, "eval_cer": 0.7020128342426364, "eval_loss": 2.6420364379882812, "eval_runtime": 229.374, "eval_samples_per_second": 23.808, "eval_steps_per_second": 2.978, "eval_wer": 1.978412001463593, "step": 9000 }, { "epoch": 17.60154738878143, "eval_cer": 0.6991204917825431, "eval_loss": 2.601982831954956, "eval_runtime": 229.7146, "eval_samples_per_second": 23.773, "eval_steps_per_second": 2.973, "eval_wer": 1.9414562751555067, "step": 9100 }, { "epoch": 17.79497098646035, "eval_cer": 0.6994577911947989, "eval_loss": 2.5666821002960205, "eval_runtime": 234.5424, "eval_samples_per_second": 23.284, "eval_steps_per_second": 2.912, "eval_wer": 1.9762166117819246, "step": 9200 }, { "epoch": 17.988394584139265, "eval_cer": 0.6771285701034666, "eval_loss": 2.517096757888794, "eval_runtime": 247.5848, "eval_samples_per_second": 22.057, "eval_steps_per_second": 2.759, "eval_wer": 1.9857299670691548, "step": 9300 }, { "epoch": 18.181818181818183, "eval_cer": 0.6774658695157224, "eval_loss": 2.492238759994507, "eval_runtime": 229.5938, "eval_samples_per_second": 23.785, "eval_steps_per_second": 2.975, "eval_wer": 1.9890230515916576, "step": 9400 }, { "epoch": 18.3752417794971, "grad_norm": 6.681089878082275, "learning_rate": 2.27856e-05, "loss": 2.4473, "step": 9500 }, { "epoch": 18.3752417794971, "eval_cer": 0.6682575955611397, "eval_loss": 2.445500373840332, "eval_runtime": 229.8314, "eval_samples_per_second": 23.761, "eval_steps_per_second": 2.972, "eval_wer": 1.9882912550311014, "step": 9500 }, { "epoch": 18.568665377176014, "eval_cer": 0.6620681513462463, "eval_loss": 2.4191782474517822, "eval_runtime": 230.0128, "eval_samples_per_second": 23.742, "eval_steps_per_second": 2.969, "eval_wer": 1.9815221368459568, "step": 9600 }, { "epoch": 18.762088974854933, "eval_cer": 0.6523454957879736, "eval_loss": 2.386597156524658, "eval_runtime": 240.9084, "eval_samples_per_second": 22.668, "eval_steps_per_second": 2.835, "eval_wer": 1.9904866447127698, "step": 9700 }, { "epoch": 18.95551257253385, "eval_cer": 0.6539392355108822, "eval_loss": 2.335400342941284, "eval_runtime": 233.6249, "eval_samples_per_second": 23.375, "eval_steps_per_second": 2.923, "eval_wer": 1.991401390413465, "step": 9800 }, { "epoch": 19.148936170212767, "eval_cer": 0.6515612746544789, "eval_loss": 2.3113534450531006, "eval_runtime": 252.8489, "eval_samples_per_second": 21.598, "eval_steps_per_second": 2.701, "eval_wer": 1.9924990852542992, "step": 9900 }, { "epoch": 19.342359767891683, "grad_norm": 8.786458015441895, "learning_rate": 2.39856e-05, "loss": 2.2307, "step": 10000 }, { "epoch": 19.342359767891683, "eval_cer": 0.645447722807343, "eval_loss": 2.269487142562866, "eval_runtime": 236.1588, "eval_samples_per_second": 23.124, "eval_steps_per_second": 2.892, "eval_wer": 1.9903036955726308, "step": 10000 }, { "epoch": 19.535783365570598, "eval_cer": 0.6464258911028847, "eval_loss": 2.246647834777832, "eval_runtime": 241.6204, "eval_samples_per_second": 22.602, "eval_steps_per_second": 2.827, "eval_wer": 1.9924990852542992, "step": 10100 }, { "epoch": 19.729206963249517, "eval_cer": 0.6422939733027515, "eval_loss": 2.2167210578918457, "eval_runtime": 230.6902, "eval_samples_per_second": 23.672, "eval_steps_per_second": 2.961, "eval_wer": 1.9928649835345773, "step": 10200 }, { "epoch": 19.922630560928432, "eval_cer": 0.641256777610065, "eval_loss": 2.176206588745117, "eval_runtime": 236.5184, "eval_samples_per_second": 23.089, "eval_steps_per_second": 2.888, "eval_wer": 1.991401390413465, "step": 10300 }, { "epoch": 20.0, "step": 10340, "total_flos": 1.4857862206321902e+19, "train_loss": 15.407913101757057, "train_runtime": 65838.1292, "train_samples_per_second": 5.022, "train_steps_per_second": 0.157 } ], "logging_steps": 500, "max_steps": 10340, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4857862206321902e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }