|
{ |
|
"best_metric": 2.176206588745117, |
|
"best_model_checkpoint": "./Hubert-common_voice_JSUT-ja-demo-japanese/checkpoint-10300", |
|
"epoch": 20.0, |
|
"eval_steps": 100, |
|
"global_step": 10340, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19342359767891681, |
|
"eval_cer": 8.48504498730911, |
|
"eval_loss": 84.69584655761719, |
|
"eval_runtime": 236.8633, |
|
"eval_samples_per_second": 23.055, |
|
"eval_steps_per_second": 2.884, |
|
"eval_wer": 1.0115257958287596, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.38684719535783363, |
|
"eval_cer": 8.344315240030694, |
|
"eval_loss": 83.78863525390625, |
|
"eval_runtime": 232.6848, |
|
"eval_samples_per_second": 23.47, |
|
"eval_steps_per_second": 2.935, |
|
"eval_wer": 1.008964507866813, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5802707930367504, |
|
"eval_cer": 4.815741763569977, |
|
"eval_loss": 81.74565124511719, |
|
"eval_runtime": 231.6247, |
|
"eval_samples_per_second": 23.577, |
|
"eval_steps_per_second": 2.949, |
|
"eval_wer": 1.000365898280278, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7736943907156673, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 75.43038940429688, |
|
"eval_runtime": 227.0361, |
|
"eval_samples_per_second": 24.053, |
|
"eval_steps_per_second": 3.008, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9671179883945842, |
|
"grad_norm": 140.793212890625, |
|
"learning_rate": 1.188e-06, |
|
"loss": 66.0277, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9671179883945842, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 63.1251106262207, |
|
"eval_runtime": 226.7903, |
|
"eval_samples_per_second": 24.08, |
|
"eval_steps_per_second": 3.012, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1605415860735009, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 57.10499572753906, |
|
"eval_runtime": 255.6303, |
|
"eval_samples_per_second": 21.363, |
|
"eval_steps_per_second": 2.672, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.3539651837524178, |
|
"eval_cer": 0.9907917260454173, |
|
"eval_loss": 55.67994689941406, |
|
"eval_runtime": 269.8181, |
|
"eval_samples_per_second": 20.24, |
|
"eval_steps_per_second": 2.531, |
|
"eval_wer": 1.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.5473887814313345, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 55.04756546020508, |
|
"eval_runtime": 241.9741, |
|
"eval_samples_per_second": 22.569, |
|
"eval_steps_per_second": 2.823, |
|
"eval_wer": 1.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.7408123791102514, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 54.408546447753906, |
|
"eval_runtime": 256.9455, |
|
"eval_samples_per_second": 21.254, |
|
"eval_steps_per_second": 2.658, |
|
"eval_wer": 1.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.9342359767891684, |
|
"grad_norm": 141.13522338867188, |
|
"learning_rate": 2.3855999999999997e-06, |
|
"loss": 46.3141, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.9342359767891684, |
|
"eval_cer": 0.9907917260454173, |
|
"eval_loss": 53.68925857543945, |
|
"eval_runtime": 260.4063, |
|
"eval_samples_per_second": 20.971, |
|
"eval_steps_per_second": 2.623, |
|
"eval_wer": 1.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 52.97110366821289, |
|
"eval_runtime": 254.697, |
|
"eval_samples_per_second": 21.441, |
|
"eval_steps_per_second": 2.682, |
|
"eval_wer": 1.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.3210831721470018, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 52.13261032104492, |
|
"eval_runtime": 246.5799, |
|
"eval_samples_per_second": 22.147, |
|
"eval_steps_per_second": 2.77, |
|
"eval_wer": 1.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.514506769825919, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 51.254920959472656, |
|
"eval_runtime": 260.2756, |
|
"eval_samples_per_second": 20.982, |
|
"eval_steps_per_second": 2.624, |
|
"eval_wer": 1.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.7079303675048356, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 50.26494598388672, |
|
"eval_runtime": 261.2096, |
|
"eval_samples_per_second": 20.907, |
|
"eval_steps_per_second": 2.615, |
|
"eval_wer": 1.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.9013539651837523, |
|
"grad_norm": 120.8663101196289, |
|
"learning_rate": 3.5856e-06, |
|
"loss": 42.8642, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.9013539651837523, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 49.20811080932617, |
|
"eval_runtime": 258.4268, |
|
"eval_samples_per_second": 21.132, |
|
"eval_steps_per_second": 2.643, |
|
"eval_wer": 1.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.094777562862669, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 48.105064392089844, |
|
"eval_runtime": 247.347, |
|
"eval_samples_per_second": 22.078, |
|
"eval_steps_per_second": 2.761, |
|
"eval_wer": 1.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.288201160541586, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 46.87884521484375, |
|
"eval_runtime": 262.3223, |
|
"eval_samples_per_second": 20.818, |
|
"eval_steps_per_second": 2.604, |
|
"eval_wer": 1.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.481624758220503, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 45.54108810424805, |
|
"eval_runtime": 228.7269, |
|
"eval_samples_per_second": 23.876, |
|
"eval_steps_per_second": 2.986, |
|
"eval_wer": 1.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.6750483558994196, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 44.151554107666016, |
|
"eval_runtime": 228.7844, |
|
"eval_samples_per_second": 23.87, |
|
"eval_steps_per_second": 2.985, |
|
"eval_wer": 1.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.8684719535783367, |
|
"grad_norm": 172.9249725341797, |
|
"learning_rate": 4.7856e-06, |
|
"loss": 38.3378, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.8684719535783367, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 42.60865783691406, |
|
"eval_runtime": 227.6693, |
|
"eval_samples_per_second": 23.987, |
|
"eval_steps_per_second": 3.0, |
|
"eval_wer": 1.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.061895551257253, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 40.9814567565918, |
|
"eval_runtime": 230.7363, |
|
"eval_samples_per_second": 23.668, |
|
"eval_steps_per_second": 2.96, |
|
"eval_wer": 1.0, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.25531914893617, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 39.240055084228516, |
|
"eval_runtime": 230.4939, |
|
"eval_samples_per_second": 23.693, |
|
"eval_steps_per_second": 2.963, |
|
"eval_wer": 1.0, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.448742746615087, |
|
"eval_cer": 0.9907917260454173, |
|
"eval_loss": 37.40217590332031, |
|
"eval_runtime": 228.8585, |
|
"eval_samples_per_second": 23.862, |
|
"eval_steps_per_second": 2.984, |
|
"eval_wer": 1.0, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.6421663442940035, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 35.430931091308594, |
|
"eval_runtime": 229.2781, |
|
"eval_samples_per_second": 23.818, |
|
"eval_steps_per_second": 2.979, |
|
"eval_wer": 1.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.835589941972921, |
|
"grad_norm": 113.81422424316406, |
|
"learning_rate": 5.9856e-06, |
|
"loss": 31.9192, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.835589941972921, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 33.417510986328125, |
|
"eval_runtime": 233.5128, |
|
"eval_samples_per_second": 23.386, |
|
"eval_steps_per_second": 2.925, |
|
"eval_wer": 1.0, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.029013539651838, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 31.266008377075195, |
|
"eval_runtime": 235.6664, |
|
"eval_samples_per_second": 23.173, |
|
"eval_steps_per_second": 2.898, |
|
"eval_wer": 1.0, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.222437137330754, |
|
"eval_cer": 0.9907917260454173, |
|
"eval_loss": 29.01472282409668, |
|
"eval_runtime": 238.0062, |
|
"eval_samples_per_second": 22.945, |
|
"eval_steps_per_second": 2.87, |
|
"eval_wer": 1.0, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 5.415860735009671, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 26.68852996826172, |
|
"eval_runtime": 267.9224, |
|
"eval_samples_per_second": 20.383, |
|
"eval_steps_per_second": 2.549, |
|
"eval_wer": 1.0, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.609284332688588, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 24.301000595092773, |
|
"eval_runtime": 235.2693, |
|
"eval_samples_per_second": 23.212, |
|
"eval_steps_per_second": 2.903, |
|
"eval_wer": 1.0, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.802707930367505, |
|
"grad_norm": 106.30471801757812, |
|
"learning_rate": 7.1856e-06, |
|
"loss": 23.4284, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.802707930367505, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 21.88077735900879, |
|
"eval_runtime": 232.75, |
|
"eval_samples_per_second": 23.463, |
|
"eval_steps_per_second": 2.934, |
|
"eval_wer": 1.0, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.996131528046422, |
|
"eval_cer": 0.9907917260454173, |
|
"eval_loss": 19.473451614379883, |
|
"eval_runtime": 233.7728, |
|
"eval_samples_per_second": 23.36, |
|
"eval_steps_per_second": 2.922, |
|
"eval_wer": 1.0, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 6.189555125725338, |
|
"eval_cer": 0.9908760508984813, |
|
"eval_loss": 17.129289627075195, |
|
"eval_runtime": 240.7161, |
|
"eval_samples_per_second": 22.686, |
|
"eval_steps_per_second": 2.837, |
|
"eval_wer": 1.0, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.382978723404255, |
|
"eval_cer": 0.9907917260454173, |
|
"eval_loss": 14.863801002502441, |
|
"eval_runtime": 233.6375, |
|
"eval_samples_per_second": 23.374, |
|
"eval_steps_per_second": 2.923, |
|
"eval_wer": 1.0, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 6.576402321083172, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 12.806206703186035, |
|
"eval_runtime": 251.782, |
|
"eval_samples_per_second": 21.689, |
|
"eval_steps_per_second": 2.713, |
|
"eval_wer": 1.0, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 6.769825918762089, |
|
"grad_norm": 82.23467254638672, |
|
"learning_rate": 8.3856e-06, |
|
"loss": 13.9431, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.769825918762089, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 10.964253425598145, |
|
"eval_runtime": 233.2904, |
|
"eval_samples_per_second": 23.409, |
|
"eval_steps_per_second": 2.928, |
|
"eval_wer": 1.0, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.963249516441006, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 9.411906242370605, |
|
"eval_runtime": 235.0547, |
|
"eval_samples_per_second": 23.233, |
|
"eval_steps_per_second": 2.906, |
|
"eval_wer": 1.0, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 7.156673114119923, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 8.164007186889648, |
|
"eval_runtime": 287.0729, |
|
"eval_samples_per_second": 19.023, |
|
"eval_steps_per_second": 2.379, |
|
"eval_wer": 1.0, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 7.350096711798839, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 7.22973108291626, |
|
"eval_runtime": 237.3392, |
|
"eval_samples_per_second": 23.009, |
|
"eval_steps_per_second": 2.878, |
|
"eval_wer": 1.0, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 7.543520309477756, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 6.571568489074707, |
|
"eval_runtime": 232.6033, |
|
"eval_samples_per_second": 23.478, |
|
"eval_steps_per_second": 2.936, |
|
"eval_wer": 1.0, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 7.7369439071566735, |
|
"grad_norm": 4.932778358459473, |
|
"learning_rate": 9.585600000000002e-06, |
|
"loss": 7.4585, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.7369439071566735, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 6.141250133514404, |
|
"eval_runtime": 234.2968, |
|
"eval_samples_per_second": 23.308, |
|
"eval_steps_per_second": 2.915, |
|
"eval_wer": 1.0, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.93036750483559, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 5.885389804840088, |
|
"eval_runtime": 236.1436, |
|
"eval_samples_per_second": 23.126, |
|
"eval_steps_per_second": 2.892, |
|
"eval_wer": 1.0, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 8.123791102514506, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 5.770660877227783, |
|
"eval_runtime": 234.5762, |
|
"eval_samples_per_second": 23.28, |
|
"eval_steps_per_second": 2.912, |
|
"eval_wer": 1.0, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 8.317214700193423, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 5.680200576782227, |
|
"eval_runtime": 231.827, |
|
"eval_samples_per_second": 23.556, |
|
"eval_steps_per_second": 2.946, |
|
"eval_wer": 1.0, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 8.51063829787234, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 5.597055435180664, |
|
"eval_runtime": 232.7784, |
|
"eval_samples_per_second": 23.46, |
|
"eval_steps_per_second": 2.934, |
|
"eval_wer": 1.0, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 8.704061895551257, |
|
"grad_norm": 1.6451424360275269, |
|
"learning_rate": 1.07856e-05, |
|
"loss": 5.7398, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 8.704061895551257, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 5.533324241638184, |
|
"eval_runtime": 232.5892, |
|
"eval_samples_per_second": 23.479, |
|
"eval_steps_per_second": 2.937, |
|
"eval_wer": 1.0, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 8.897485493230175, |
|
"eval_cer": 0.9907074011923535, |
|
"eval_loss": 5.475062370300293, |
|
"eval_runtime": 233.1057, |
|
"eval_samples_per_second": 23.427, |
|
"eval_steps_per_second": 2.93, |
|
"eval_wer": 1.0, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 9.090909090909092, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 5.425434589385986, |
|
"eval_runtime": 232.7779, |
|
"eval_samples_per_second": 23.46, |
|
"eval_steps_per_second": 2.934, |
|
"eval_wer": 1.0, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 9.284332688588007, |
|
"eval_cer": 0.9908423209572558, |
|
"eval_loss": 5.377471923828125, |
|
"eval_runtime": 234.4379, |
|
"eval_samples_per_second": 23.294, |
|
"eval_steps_per_second": 2.913, |
|
"eval_wer": 1.1319063300402488, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 9.477756286266924, |
|
"eval_cer": 0.9907495636188854, |
|
"eval_loss": 5.343258380889893, |
|
"eval_runtime": 239.0036, |
|
"eval_samples_per_second": 22.849, |
|
"eval_steps_per_second": 2.858, |
|
"eval_wer": 1.33205268935236, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 9.671179883945841, |
|
"grad_norm": 2.292585611343384, |
|
"learning_rate": 1.19856e-05, |
|
"loss": 5.4159, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 9.671179883945841, |
|
"eval_cer": 0.9906146438539831, |
|
"eval_loss": 5.311874866485596, |
|
"eval_runtime": 235.3723, |
|
"eval_samples_per_second": 23.202, |
|
"eval_steps_per_second": 2.902, |
|
"eval_wer": 1.686242224661544, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 9.864603481624759, |
|
"eval_cer": 0.9909688082368516, |
|
"eval_loss": 5.269064426422119, |
|
"eval_runtime": 234.5017, |
|
"eval_samples_per_second": 23.288, |
|
"eval_steps_per_second": 2.913, |
|
"eval_wer": 1.4255396999634102, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 10.058027079303676, |
|
"eval_cer": 0.9909182133250133, |
|
"eval_loss": 5.236879825592041, |
|
"eval_runtime": 235.3148, |
|
"eval_samples_per_second": 23.207, |
|
"eval_steps_per_second": 2.902, |
|
"eval_wer": 1.4043175997072814, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 10.251450676982591, |
|
"eval_cer": 0.9909603757515453, |
|
"eval_loss": 5.194947719573975, |
|
"eval_runtime": 232.4545, |
|
"eval_samples_per_second": 23.493, |
|
"eval_steps_per_second": 2.938, |
|
"eval_wer": 1.5686059275521405, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 10.444874274661508, |
|
"eval_cer": 0.9908170235013366, |
|
"eval_loss": 5.151918888092041, |
|
"eval_runtime": 231.5581, |
|
"eval_samples_per_second": 23.584, |
|
"eval_steps_per_second": 2.95, |
|
"eval_wer": 1.5166483717526527, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"grad_norm": 2.0632503032684326, |
|
"learning_rate": 1.3185600000000001e-05, |
|
"loss": 5.2163, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"eval_cer": 0.9909603757515453, |
|
"eval_loss": 5.108114242553711, |
|
"eval_runtime": 232.6627, |
|
"eval_samples_per_second": 23.472, |
|
"eval_steps_per_second": 2.936, |
|
"eval_wer": 1.247713135748262, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 10.831721470019342, |
|
"eval_cer": 0.9907664285894981, |
|
"eval_loss": 5.055335998535156, |
|
"eval_runtime": 231.9668, |
|
"eval_samples_per_second": 23.542, |
|
"eval_steps_per_second": 2.944, |
|
"eval_wer": 1.5124405415294548, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 11.02514506769826, |
|
"eval_cer": 0.9908760508984813, |
|
"eval_loss": 5.012271881103516, |
|
"eval_runtime": 231.643, |
|
"eval_samples_per_second": 23.575, |
|
"eval_steps_per_second": 2.949, |
|
"eval_wer": 1.5495792169776803, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 11.218568665377177, |
|
"eval_cer": 0.9885824148951421, |
|
"eval_loss": 4.942389965057373, |
|
"eval_runtime": 232.3058, |
|
"eval_samples_per_second": 23.508, |
|
"eval_steps_per_second": 2.94, |
|
"eval_wer": 1.7621661178192463, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 11.411992263056092, |
|
"eval_cer": 0.9830675695047602, |
|
"eval_loss": 4.8753485679626465, |
|
"eval_runtime": 233.7911, |
|
"eval_samples_per_second": 23.358, |
|
"eval_steps_per_second": 2.921, |
|
"eval_wer": 1.5404317599707282, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 11.60541586073501, |
|
"grad_norm": 1.9146788120269775, |
|
"learning_rate": 1.43856e-05, |
|
"loss": 4.9465, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 11.60541586073501, |
|
"eval_cer": 0.9749808160959279, |
|
"eval_loss": 4.77677059173584, |
|
"eval_runtime": 232.0203, |
|
"eval_samples_per_second": 23.537, |
|
"eval_steps_per_second": 2.944, |
|
"eval_wer": 1.853457738748628, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 11.798839458413926, |
|
"eval_cer": 0.9713126849876464, |
|
"eval_loss": 4.6841044425964355, |
|
"eval_runtime": 231.7273, |
|
"eval_samples_per_second": 23.566, |
|
"eval_steps_per_second": 2.947, |
|
"eval_wer": 1.8395536040980607, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 11.992263056092844, |
|
"eval_cer": 0.9697020802941251, |
|
"eval_loss": 4.582820892333984, |
|
"eval_runtime": 232.9124, |
|
"eval_samples_per_second": 23.447, |
|
"eval_steps_per_second": 2.932, |
|
"eval_wer": 1.7444200512257593, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 12.18568665377176, |
|
"eval_cer": 0.968909426675324, |
|
"eval_loss": 4.485307693481445, |
|
"eval_runtime": 232.9842, |
|
"eval_samples_per_second": 23.439, |
|
"eval_steps_per_second": 2.932, |
|
"eval_wer": 1.801317233809001, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 12.379110251450676, |
|
"eval_cer": 0.9556029648618337, |
|
"eval_loss": 4.395504951477051, |
|
"eval_runtime": 233.0172, |
|
"eval_samples_per_second": 23.436, |
|
"eval_steps_per_second": 2.931, |
|
"eval_wer": 1.827844859129162, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 12.572533849129593, |
|
"grad_norm": 4.459765434265137, |
|
"learning_rate": 1.55856e-05, |
|
"loss": 4.5094, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 12.572533849129593, |
|
"eval_cer": 0.9123021528134987, |
|
"eval_loss": 4.284241676330566, |
|
"eval_runtime": 230.5488, |
|
"eval_samples_per_second": 23.687, |
|
"eval_steps_per_second": 2.962, |
|
"eval_wer": 1.8728503476033662, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 12.76595744680851, |
|
"eval_cer": 0.8650380726711584, |
|
"eval_loss": 4.181938171386719, |
|
"eval_runtime": 232.0403, |
|
"eval_samples_per_second": 23.535, |
|
"eval_steps_per_second": 2.943, |
|
"eval_wer": 1.9094401756311745, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 12.959381044487428, |
|
"eval_cer": 0.848628456264915, |
|
"eval_loss": 4.074057579040527, |
|
"eval_runtime": 231.4468, |
|
"eval_samples_per_second": 23.595, |
|
"eval_steps_per_second": 2.951, |
|
"eval_wer": 1.9134650567142335, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 13.152804642166345, |
|
"eval_cer": 0.8385937987503057, |
|
"eval_loss": 3.9648523330688477, |
|
"eval_runtime": 231.3067, |
|
"eval_samples_per_second": 23.609, |
|
"eval_steps_per_second": 2.953, |
|
"eval_wer": 1.9191364800585438, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 13.346228239845262, |
|
"eval_cer": 0.8189292430157941, |
|
"eval_loss": 3.8640658855438232, |
|
"eval_runtime": 237.0711, |
|
"eval_samples_per_second": 23.035, |
|
"eval_steps_per_second": 2.881, |
|
"eval_wer": 1.9195023783388219, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 13.539651837524177, |
|
"grad_norm": 4.586193084716797, |
|
"learning_rate": 1.67856e-05, |
|
"loss": 4.0097, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 13.539651837524177, |
|
"eval_cer": 0.8014487009756386, |
|
"eval_loss": 3.7686750888824463, |
|
"eval_runtime": 244.328, |
|
"eval_samples_per_second": 22.351, |
|
"eval_steps_per_second": 2.795, |
|
"eval_wer": 1.9275521405049396, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 13.733075435203094, |
|
"eval_cer": 0.7963301823946571, |
|
"eval_loss": 3.680776596069336, |
|
"eval_runtime": 230.3109, |
|
"eval_samples_per_second": 23.711, |
|
"eval_steps_per_second": 2.966, |
|
"eval_wer": 1.9259055982436883, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 13.926499032882012, |
|
"eval_cer": 0.779220669707983, |
|
"eval_loss": 3.60208797454834, |
|
"eval_runtime": 229.5487, |
|
"eval_samples_per_second": 23.79, |
|
"eval_steps_per_second": 2.975, |
|
"eval_wer": 1.9275521405049396, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 14.119922630560929, |
|
"eval_cer": 0.777466712764253, |
|
"eval_loss": 3.55332088470459, |
|
"eval_runtime": 231.4776, |
|
"eval_samples_per_second": 23.592, |
|
"eval_steps_per_second": 2.951, |
|
"eval_wer": 1.9366995975118917, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 14.313346228239846, |
|
"eval_cer": 0.775071886937237, |
|
"eval_loss": 3.476841449737549, |
|
"eval_runtime": 240.5885, |
|
"eval_samples_per_second": 22.699, |
|
"eval_steps_per_second": 2.839, |
|
"eval_wer": 1.9321258690084158, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 14.506769825918763, |
|
"grad_norm": 4.310749053955078, |
|
"learning_rate": 1.7985600000000003e-05, |
|
"loss": 3.5619, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 14.506769825918763, |
|
"eval_cer": 0.7672043781463711, |
|
"eval_loss": 3.4284844398498535, |
|
"eval_runtime": 240.9512, |
|
"eval_samples_per_second": 22.664, |
|
"eval_steps_per_second": 2.835, |
|
"eval_wer": 1.938529088913282, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 14.700193423597678, |
|
"eval_cer": 0.7659816677769439, |
|
"eval_loss": 3.362793445587158, |
|
"eval_runtime": 230.1401, |
|
"eval_samples_per_second": 23.729, |
|
"eval_steps_per_second": 2.968, |
|
"eval_wer": 1.9361507500914745, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 14.893617021276595, |
|
"eval_cer": 0.7618919124033426, |
|
"eval_loss": 3.2909657955169678, |
|
"eval_runtime": 233.6336, |
|
"eval_samples_per_second": 23.374, |
|
"eval_steps_per_second": 2.923, |
|
"eval_wer": 1.9313940724478595, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 15.087040618955513, |
|
"eval_cer": 0.7486360455016907, |
|
"eval_loss": 3.2242627143859863, |
|
"eval_runtime": 241.9894, |
|
"eval_samples_per_second": 22.567, |
|
"eval_steps_per_second": 2.822, |
|
"eval_wer": 1.928832784485913, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 15.28046421663443, |
|
"eval_cer": 0.7431802275084536, |
|
"eval_loss": 3.164518117904663, |
|
"eval_runtime": 232.2322, |
|
"eval_samples_per_second": 23.515, |
|
"eval_steps_per_second": 2.941, |
|
"eval_wer": 1.9308452250274424, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 15.473887814313347, |
|
"grad_norm": 5.67767333984375, |
|
"learning_rate": 1.91856e-05, |
|
"loss": 3.2379, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 15.473887814313347, |
|
"eval_cer": 0.7382556560895193, |
|
"eval_loss": 3.1185944080352783, |
|
"eval_runtime": 235.8886, |
|
"eval_samples_per_second": 23.151, |
|
"eval_steps_per_second": 2.895, |
|
"eval_wer": 1.9332235638492499, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 15.667311411992262, |
|
"eval_cer": 0.7374714349560246, |
|
"eval_loss": 3.078275203704834, |
|
"eval_runtime": 234.7574, |
|
"eval_samples_per_second": 23.262, |
|
"eval_steps_per_second": 2.909, |
|
"eval_wer": 1.9348701061105014, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 15.86073500967118, |
|
"eval_cer": 0.727942726559799, |
|
"eval_loss": 3.0145950317382812, |
|
"eval_runtime": 231.4339, |
|
"eval_samples_per_second": 23.596, |
|
"eval_steps_per_second": 2.951, |
|
"eval_wer": 1.9321258690084158, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 16.054158607350097, |
|
"eval_cer": 0.7300086854598656, |
|
"eval_loss": 2.9523487091064453, |
|
"eval_runtime": 235.1315, |
|
"eval_samples_per_second": 23.225, |
|
"eval_steps_per_second": 2.905, |
|
"eval_wer": 1.9308452250274424, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 16.247582205029012, |
|
"eval_cer": 0.7254467109091063, |
|
"eval_loss": 2.918687105178833, |
|
"eval_runtime": 238.3374, |
|
"eval_samples_per_second": 22.913, |
|
"eval_steps_per_second": 2.866, |
|
"eval_wer": 1.9273691913648006, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 16.44100580270793, |
|
"grad_norm": 8.755329132080078, |
|
"learning_rate": 2.03856e-05, |
|
"loss": 2.9448, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 16.44100580270793, |
|
"eval_cer": 0.7177394193390618, |
|
"eval_loss": 2.8671371936798096, |
|
"eval_runtime": 229.0989, |
|
"eval_samples_per_second": 23.837, |
|
"eval_steps_per_second": 2.981, |
|
"eval_wer": 1.929015733626052, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 16.634429400386846, |
|
"eval_cer": 0.7115752725800876, |
|
"eval_loss": 2.8188540935516357, |
|
"eval_runtime": 230.6192, |
|
"eval_samples_per_second": 23.68, |
|
"eval_steps_per_second": 2.962, |
|
"eval_wer": 1.9348701061105014, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 16.827852998065765, |
|
"eval_cer": 0.70778065419221, |
|
"eval_loss": 2.76908802986145, |
|
"eval_runtime": 239.0894, |
|
"eval_samples_per_second": 22.841, |
|
"eval_steps_per_second": 2.857, |
|
"eval_wer": 1.9365166483717526, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 17.02127659574468, |
|
"eval_cer": 0.7069205406909579, |
|
"eval_loss": 2.731661081314087, |
|
"eval_runtime": 231.1522, |
|
"eval_samples_per_second": 23.625, |
|
"eval_steps_per_second": 2.955, |
|
"eval_wer": 1.9420051225759238, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 17.214700193423596, |
|
"eval_cer": 0.7056388029243859, |
|
"eval_loss": 2.683185577392578, |
|
"eval_runtime": 229.2327, |
|
"eval_samples_per_second": 23.823, |
|
"eval_steps_per_second": 2.98, |
|
"eval_wer": 1.9489571899012075, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 17.408123791102515, |
|
"grad_norm": 4.358935832977295, |
|
"learning_rate": 2.1585600000000002e-05, |
|
"loss": 2.6749, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 17.408123791102515, |
|
"eval_cer": 0.7020128342426364, |
|
"eval_loss": 2.6420364379882812, |
|
"eval_runtime": 229.374, |
|
"eval_samples_per_second": 23.808, |
|
"eval_steps_per_second": 2.978, |
|
"eval_wer": 1.978412001463593, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 17.60154738878143, |
|
"eval_cer": 0.6991204917825431, |
|
"eval_loss": 2.601982831954956, |
|
"eval_runtime": 229.7146, |
|
"eval_samples_per_second": 23.773, |
|
"eval_steps_per_second": 2.973, |
|
"eval_wer": 1.9414562751555067, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 17.79497098646035, |
|
"eval_cer": 0.6994577911947989, |
|
"eval_loss": 2.5666821002960205, |
|
"eval_runtime": 234.5424, |
|
"eval_samples_per_second": 23.284, |
|
"eval_steps_per_second": 2.912, |
|
"eval_wer": 1.9762166117819246, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 17.988394584139265, |
|
"eval_cer": 0.6771285701034666, |
|
"eval_loss": 2.517096757888794, |
|
"eval_runtime": 247.5848, |
|
"eval_samples_per_second": 22.057, |
|
"eval_steps_per_second": 2.759, |
|
"eval_wer": 1.9857299670691548, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 18.181818181818183, |
|
"eval_cer": 0.6774658695157224, |
|
"eval_loss": 2.492238759994507, |
|
"eval_runtime": 229.5938, |
|
"eval_samples_per_second": 23.785, |
|
"eval_steps_per_second": 2.975, |
|
"eval_wer": 1.9890230515916576, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 18.3752417794971, |
|
"grad_norm": 6.681089878082275, |
|
"learning_rate": 2.27856e-05, |
|
"loss": 2.4473, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 18.3752417794971, |
|
"eval_cer": 0.6682575955611397, |
|
"eval_loss": 2.445500373840332, |
|
"eval_runtime": 229.8314, |
|
"eval_samples_per_second": 23.761, |
|
"eval_steps_per_second": 2.972, |
|
"eval_wer": 1.9882912550311014, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 18.568665377176014, |
|
"eval_cer": 0.6620681513462463, |
|
"eval_loss": 2.4191782474517822, |
|
"eval_runtime": 230.0128, |
|
"eval_samples_per_second": 23.742, |
|
"eval_steps_per_second": 2.969, |
|
"eval_wer": 1.9815221368459568, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 18.762088974854933, |
|
"eval_cer": 0.6523454957879736, |
|
"eval_loss": 2.386597156524658, |
|
"eval_runtime": 240.9084, |
|
"eval_samples_per_second": 22.668, |
|
"eval_steps_per_second": 2.835, |
|
"eval_wer": 1.9904866447127698, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 18.95551257253385, |
|
"eval_cer": 0.6539392355108822, |
|
"eval_loss": 2.335400342941284, |
|
"eval_runtime": 233.6249, |
|
"eval_samples_per_second": 23.375, |
|
"eval_steps_per_second": 2.923, |
|
"eval_wer": 1.991401390413465, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 19.148936170212767, |
|
"eval_cer": 0.6515612746544789, |
|
"eval_loss": 2.3113534450531006, |
|
"eval_runtime": 252.8489, |
|
"eval_samples_per_second": 21.598, |
|
"eval_steps_per_second": 2.701, |
|
"eval_wer": 1.9924990852542992, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 19.342359767891683, |
|
"grad_norm": 8.786458015441895, |
|
"learning_rate": 2.39856e-05, |
|
"loss": 2.2307, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 19.342359767891683, |
|
"eval_cer": 0.645447722807343, |
|
"eval_loss": 2.269487142562866, |
|
"eval_runtime": 236.1588, |
|
"eval_samples_per_second": 23.124, |
|
"eval_steps_per_second": 2.892, |
|
"eval_wer": 1.9903036955726308, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 19.535783365570598, |
|
"eval_cer": 0.6464258911028847, |
|
"eval_loss": 2.246647834777832, |
|
"eval_runtime": 241.6204, |
|
"eval_samples_per_second": 22.602, |
|
"eval_steps_per_second": 2.827, |
|
"eval_wer": 1.9924990852542992, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 19.729206963249517, |
|
"eval_cer": 0.6422939733027515, |
|
"eval_loss": 2.2167210578918457, |
|
"eval_runtime": 230.6902, |
|
"eval_samples_per_second": 23.672, |
|
"eval_steps_per_second": 2.961, |
|
"eval_wer": 1.9928649835345773, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 19.922630560928432, |
|
"eval_cer": 0.641256777610065, |
|
"eval_loss": 2.176206588745117, |
|
"eval_runtime": 236.5184, |
|
"eval_samples_per_second": 23.089, |
|
"eval_steps_per_second": 2.888, |
|
"eval_wer": 1.991401390413465, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 10340, |
|
"total_flos": 1.4857862206321902e+19, |
|
"train_loss": 15.407913101757057, |
|
"train_runtime": 65838.1292, |
|
"train_samples_per_second": 5.022, |
|
"train_steps_per_second": 0.157 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10340, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4857862206321902e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|