utakumi's picture
End of training
8565ad4 verified
{
"best_metric": 2.176206588745117,
"best_model_checkpoint": "./Hubert-common_voice_JSUT-ja-demo-japanese/checkpoint-10300",
"epoch": 20.0,
"eval_steps": 100,
"global_step": 10340,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.19342359767891681,
"eval_cer": 8.48504498730911,
"eval_loss": 84.69584655761719,
"eval_runtime": 236.8633,
"eval_samples_per_second": 23.055,
"eval_steps_per_second": 2.884,
"eval_wer": 1.0115257958287596,
"step": 100
},
{
"epoch": 0.38684719535783363,
"eval_cer": 8.344315240030694,
"eval_loss": 83.78863525390625,
"eval_runtime": 232.6848,
"eval_samples_per_second": 23.47,
"eval_steps_per_second": 2.935,
"eval_wer": 1.008964507866813,
"step": 200
},
{
"epoch": 0.5802707930367504,
"eval_cer": 4.815741763569977,
"eval_loss": 81.74565124511719,
"eval_runtime": 231.6247,
"eval_samples_per_second": 23.577,
"eval_steps_per_second": 2.949,
"eval_wer": 1.000365898280278,
"step": 300
},
{
"epoch": 0.7736943907156673,
"eval_cer": 0.9907074011923535,
"eval_loss": 75.43038940429688,
"eval_runtime": 227.0361,
"eval_samples_per_second": 24.053,
"eval_steps_per_second": 3.008,
"eval_wer": 1.0,
"step": 400
},
{
"epoch": 0.9671179883945842,
"grad_norm": 140.793212890625,
"learning_rate": 1.188e-06,
"loss": 66.0277,
"step": 500
},
{
"epoch": 0.9671179883945842,
"eval_cer": 0.9907495636188854,
"eval_loss": 63.1251106262207,
"eval_runtime": 226.7903,
"eval_samples_per_second": 24.08,
"eval_steps_per_second": 3.012,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 1.1605415860735009,
"eval_cer": 0.9907074011923535,
"eval_loss": 57.10499572753906,
"eval_runtime": 255.6303,
"eval_samples_per_second": 21.363,
"eval_steps_per_second": 2.672,
"eval_wer": 1.0,
"step": 600
},
{
"epoch": 1.3539651837524178,
"eval_cer": 0.9907917260454173,
"eval_loss": 55.67994689941406,
"eval_runtime": 269.8181,
"eval_samples_per_second": 20.24,
"eval_steps_per_second": 2.531,
"eval_wer": 1.0,
"step": 700
},
{
"epoch": 1.5473887814313345,
"eval_cer": 0.9907495636188854,
"eval_loss": 55.04756546020508,
"eval_runtime": 241.9741,
"eval_samples_per_second": 22.569,
"eval_steps_per_second": 2.823,
"eval_wer": 1.0,
"step": 800
},
{
"epoch": 1.7408123791102514,
"eval_cer": 0.9907074011923535,
"eval_loss": 54.408546447753906,
"eval_runtime": 256.9455,
"eval_samples_per_second": 21.254,
"eval_steps_per_second": 2.658,
"eval_wer": 1.0,
"step": 900
},
{
"epoch": 1.9342359767891684,
"grad_norm": 141.13522338867188,
"learning_rate": 2.3855999999999997e-06,
"loss": 46.3141,
"step": 1000
},
{
"epoch": 1.9342359767891684,
"eval_cer": 0.9907917260454173,
"eval_loss": 53.68925857543945,
"eval_runtime": 260.4063,
"eval_samples_per_second": 20.971,
"eval_steps_per_second": 2.623,
"eval_wer": 1.0,
"step": 1000
},
{
"epoch": 2.127659574468085,
"eval_cer": 0.9907074011923535,
"eval_loss": 52.97110366821289,
"eval_runtime": 254.697,
"eval_samples_per_second": 21.441,
"eval_steps_per_second": 2.682,
"eval_wer": 1.0,
"step": 1100
},
{
"epoch": 2.3210831721470018,
"eval_cer": 0.9907495636188854,
"eval_loss": 52.13261032104492,
"eval_runtime": 246.5799,
"eval_samples_per_second": 22.147,
"eval_steps_per_second": 2.77,
"eval_wer": 1.0,
"step": 1200
},
{
"epoch": 2.514506769825919,
"eval_cer": 0.9907074011923535,
"eval_loss": 51.254920959472656,
"eval_runtime": 260.2756,
"eval_samples_per_second": 20.982,
"eval_steps_per_second": 2.624,
"eval_wer": 1.0,
"step": 1300
},
{
"epoch": 2.7079303675048356,
"eval_cer": 0.9907074011923535,
"eval_loss": 50.26494598388672,
"eval_runtime": 261.2096,
"eval_samples_per_second": 20.907,
"eval_steps_per_second": 2.615,
"eval_wer": 1.0,
"step": 1400
},
{
"epoch": 2.9013539651837523,
"grad_norm": 120.8663101196289,
"learning_rate": 3.5856e-06,
"loss": 42.8642,
"step": 1500
},
{
"epoch": 2.9013539651837523,
"eval_cer": 0.9907074011923535,
"eval_loss": 49.20811080932617,
"eval_runtime": 258.4268,
"eval_samples_per_second": 21.132,
"eval_steps_per_second": 2.643,
"eval_wer": 1.0,
"step": 1500
},
{
"epoch": 3.094777562862669,
"eval_cer": 0.9907495636188854,
"eval_loss": 48.105064392089844,
"eval_runtime": 247.347,
"eval_samples_per_second": 22.078,
"eval_steps_per_second": 2.761,
"eval_wer": 1.0,
"step": 1600
},
{
"epoch": 3.288201160541586,
"eval_cer": 0.9907074011923535,
"eval_loss": 46.87884521484375,
"eval_runtime": 262.3223,
"eval_samples_per_second": 20.818,
"eval_steps_per_second": 2.604,
"eval_wer": 1.0,
"step": 1700
},
{
"epoch": 3.481624758220503,
"eval_cer": 0.9907074011923535,
"eval_loss": 45.54108810424805,
"eval_runtime": 228.7269,
"eval_samples_per_second": 23.876,
"eval_steps_per_second": 2.986,
"eval_wer": 1.0,
"step": 1800
},
{
"epoch": 3.6750483558994196,
"eval_cer": 0.9907074011923535,
"eval_loss": 44.151554107666016,
"eval_runtime": 228.7844,
"eval_samples_per_second": 23.87,
"eval_steps_per_second": 2.985,
"eval_wer": 1.0,
"step": 1900
},
{
"epoch": 3.8684719535783367,
"grad_norm": 172.9249725341797,
"learning_rate": 4.7856e-06,
"loss": 38.3378,
"step": 2000
},
{
"epoch": 3.8684719535783367,
"eval_cer": 0.9907495636188854,
"eval_loss": 42.60865783691406,
"eval_runtime": 227.6693,
"eval_samples_per_second": 23.987,
"eval_steps_per_second": 3.0,
"eval_wer": 1.0,
"step": 2000
},
{
"epoch": 4.061895551257253,
"eval_cer": 0.9907074011923535,
"eval_loss": 40.9814567565918,
"eval_runtime": 230.7363,
"eval_samples_per_second": 23.668,
"eval_steps_per_second": 2.96,
"eval_wer": 1.0,
"step": 2100
},
{
"epoch": 4.25531914893617,
"eval_cer": 0.9907074011923535,
"eval_loss": 39.240055084228516,
"eval_runtime": 230.4939,
"eval_samples_per_second": 23.693,
"eval_steps_per_second": 2.963,
"eval_wer": 1.0,
"step": 2200
},
{
"epoch": 4.448742746615087,
"eval_cer": 0.9907917260454173,
"eval_loss": 37.40217590332031,
"eval_runtime": 228.8585,
"eval_samples_per_second": 23.862,
"eval_steps_per_second": 2.984,
"eval_wer": 1.0,
"step": 2300
},
{
"epoch": 4.6421663442940035,
"eval_cer": 0.9907495636188854,
"eval_loss": 35.430931091308594,
"eval_runtime": 229.2781,
"eval_samples_per_second": 23.818,
"eval_steps_per_second": 2.979,
"eval_wer": 1.0,
"step": 2400
},
{
"epoch": 4.835589941972921,
"grad_norm": 113.81422424316406,
"learning_rate": 5.9856e-06,
"loss": 31.9192,
"step": 2500
},
{
"epoch": 4.835589941972921,
"eval_cer": 0.9907074011923535,
"eval_loss": 33.417510986328125,
"eval_runtime": 233.5128,
"eval_samples_per_second": 23.386,
"eval_steps_per_second": 2.925,
"eval_wer": 1.0,
"step": 2500
},
{
"epoch": 5.029013539651838,
"eval_cer": 0.9907495636188854,
"eval_loss": 31.266008377075195,
"eval_runtime": 235.6664,
"eval_samples_per_second": 23.173,
"eval_steps_per_second": 2.898,
"eval_wer": 1.0,
"step": 2600
},
{
"epoch": 5.222437137330754,
"eval_cer": 0.9907917260454173,
"eval_loss": 29.01472282409668,
"eval_runtime": 238.0062,
"eval_samples_per_second": 22.945,
"eval_steps_per_second": 2.87,
"eval_wer": 1.0,
"step": 2700
},
{
"epoch": 5.415860735009671,
"eval_cer": 0.9907495636188854,
"eval_loss": 26.68852996826172,
"eval_runtime": 267.9224,
"eval_samples_per_second": 20.383,
"eval_steps_per_second": 2.549,
"eval_wer": 1.0,
"step": 2800
},
{
"epoch": 5.609284332688588,
"eval_cer": 0.9907495636188854,
"eval_loss": 24.301000595092773,
"eval_runtime": 235.2693,
"eval_samples_per_second": 23.212,
"eval_steps_per_second": 2.903,
"eval_wer": 1.0,
"step": 2900
},
{
"epoch": 5.802707930367505,
"grad_norm": 106.30471801757812,
"learning_rate": 7.1856e-06,
"loss": 23.4284,
"step": 3000
},
{
"epoch": 5.802707930367505,
"eval_cer": 0.9907495636188854,
"eval_loss": 21.88077735900879,
"eval_runtime": 232.75,
"eval_samples_per_second": 23.463,
"eval_steps_per_second": 2.934,
"eval_wer": 1.0,
"step": 3000
},
{
"epoch": 5.996131528046422,
"eval_cer": 0.9907917260454173,
"eval_loss": 19.473451614379883,
"eval_runtime": 233.7728,
"eval_samples_per_second": 23.36,
"eval_steps_per_second": 2.922,
"eval_wer": 1.0,
"step": 3100
},
{
"epoch": 6.189555125725338,
"eval_cer": 0.9908760508984813,
"eval_loss": 17.129289627075195,
"eval_runtime": 240.7161,
"eval_samples_per_second": 22.686,
"eval_steps_per_second": 2.837,
"eval_wer": 1.0,
"step": 3200
},
{
"epoch": 6.382978723404255,
"eval_cer": 0.9907917260454173,
"eval_loss": 14.863801002502441,
"eval_runtime": 233.6375,
"eval_samples_per_second": 23.374,
"eval_steps_per_second": 2.923,
"eval_wer": 1.0,
"step": 3300
},
{
"epoch": 6.576402321083172,
"eval_cer": 0.9907074011923535,
"eval_loss": 12.806206703186035,
"eval_runtime": 251.782,
"eval_samples_per_second": 21.689,
"eval_steps_per_second": 2.713,
"eval_wer": 1.0,
"step": 3400
},
{
"epoch": 6.769825918762089,
"grad_norm": 82.23467254638672,
"learning_rate": 8.3856e-06,
"loss": 13.9431,
"step": 3500
},
{
"epoch": 6.769825918762089,
"eval_cer": 0.9907074011923535,
"eval_loss": 10.964253425598145,
"eval_runtime": 233.2904,
"eval_samples_per_second": 23.409,
"eval_steps_per_second": 2.928,
"eval_wer": 1.0,
"step": 3500
},
{
"epoch": 6.963249516441006,
"eval_cer": 0.9907495636188854,
"eval_loss": 9.411906242370605,
"eval_runtime": 235.0547,
"eval_samples_per_second": 23.233,
"eval_steps_per_second": 2.906,
"eval_wer": 1.0,
"step": 3600
},
{
"epoch": 7.156673114119923,
"eval_cer": 0.9907495636188854,
"eval_loss": 8.164007186889648,
"eval_runtime": 287.0729,
"eval_samples_per_second": 19.023,
"eval_steps_per_second": 2.379,
"eval_wer": 1.0,
"step": 3700
},
{
"epoch": 7.350096711798839,
"eval_cer": 0.9907495636188854,
"eval_loss": 7.22973108291626,
"eval_runtime": 237.3392,
"eval_samples_per_second": 23.009,
"eval_steps_per_second": 2.878,
"eval_wer": 1.0,
"step": 3800
},
{
"epoch": 7.543520309477756,
"eval_cer": 0.9907074011923535,
"eval_loss": 6.571568489074707,
"eval_runtime": 232.6033,
"eval_samples_per_second": 23.478,
"eval_steps_per_second": 2.936,
"eval_wer": 1.0,
"step": 3900
},
{
"epoch": 7.7369439071566735,
"grad_norm": 4.932778358459473,
"learning_rate": 9.585600000000002e-06,
"loss": 7.4585,
"step": 4000
},
{
"epoch": 7.7369439071566735,
"eval_cer": 0.9907495636188854,
"eval_loss": 6.141250133514404,
"eval_runtime": 234.2968,
"eval_samples_per_second": 23.308,
"eval_steps_per_second": 2.915,
"eval_wer": 1.0,
"step": 4000
},
{
"epoch": 7.93036750483559,
"eval_cer": 0.9907074011923535,
"eval_loss": 5.885389804840088,
"eval_runtime": 236.1436,
"eval_samples_per_second": 23.126,
"eval_steps_per_second": 2.892,
"eval_wer": 1.0,
"step": 4100
},
{
"epoch": 8.123791102514506,
"eval_cer": 0.9907495636188854,
"eval_loss": 5.770660877227783,
"eval_runtime": 234.5762,
"eval_samples_per_second": 23.28,
"eval_steps_per_second": 2.912,
"eval_wer": 1.0,
"step": 4200
},
{
"epoch": 8.317214700193423,
"eval_cer": 0.9907074011923535,
"eval_loss": 5.680200576782227,
"eval_runtime": 231.827,
"eval_samples_per_second": 23.556,
"eval_steps_per_second": 2.946,
"eval_wer": 1.0,
"step": 4300
},
{
"epoch": 8.51063829787234,
"eval_cer": 0.9907495636188854,
"eval_loss": 5.597055435180664,
"eval_runtime": 232.7784,
"eval_samples_per_second": 23.46,
"eval_steps_per_second": 2.934,
"eval_wer": 1.0,
"step": 4400
},
{
"epoch": 8.704061895551257,
"grad_norm": 1.6451424360275269,
"learning_rate": 1.07856e-05,
"loss": 5.7398,
"step": 4500
},
{
"epoch": 8.704061895551257,
"eval_cer": 0.9907495636188854,
"eval_loss": 5.533324241638184,
"eval_runtime": 232.5892,
"eval_samples_per_second": 23.479,
"eval_steps_per_second": 2.937,
"eval_wer": 1.0,
"step": 4500
},
{
"epoch": 8.897485493230175,
"eval_cer": 0.9907074011923535,
"eval_loss": 5.475062370300293,
"eval_runtime": 233.1057,
"eval_samples_per_second": 23.427,
"eval_steps_per_second": 2.93,
"eval_wer": 1.0,
"step": 4600
},
{
"epoch": 9.090909090909092,
"eval_cer": 0.9907495636188854,
"eval_loss": 5.425434589385986,
"eval_runtime": 232.7779,
"eval_samples_per_second": 23.46,
"eval_steps_per_second": 2.934,
"eval_wer": 1.0,
"step": 4700
},
{
"epoch": 9.284332688588007,
"eval_cer": 0.9908423209572558,
"eval_loss": 5.377471923828125,
"eval_runtime": 234.4379,
"eval_samples_per_second": 23.294,
"eval_steps_per_second": 2.913,
"eval_wer": 1.1319063300402488,
"step": 4800
},
{
"epoch": 9.477756286266924,
"eval_cer": 0.9907495636188854,
"eval_loss": 5.343258380889893,
"eval_runtime": 239.0036,
"eval_samples_per_second": 22.849,
"eval_steps_per_second": 2.858,
"eval_wer": 1.33205268935236,
"step": 4900
},
{
"epoch": 9.671179883945841,
"grad_norm": 2.292585611343384,
"learning_rate": 1.19856e-05,
"loss": 5.4159,
"step": 5000
},
{
"epoch": 9.671179883945841,
"eval_cer": 0.9906146438539831,
"eval_loss": 5.311874866485596,
"eval_runtime": 235.3723,
"eval_samples_per_second": 23.202,
"eval_steps_per_second": 2.902,
"eval_wer": 1.686242224661544,
"step": 5000
},
{
"epoch": 9.864603481624759,
"eval_cer": 0.9909688082368516,
"eval_loss": 5.269064426422119,
"eval_runtime": 234.5017,
"eval_samples_per_second": 23.288,
"eval_steps_per_second": 2.913,
"eval_wer": 1.4255396999634102,
"step": 5100
},
{
"epoch": 10.058027079303676,
"eval_cer": 0.9909182133250133,
"eval_loss": 5.236879825592041,
"eval_runtime": 235.3148,
"eval_samples_per_second": 23.207,
"eval_steps_per_second": 2.902,
"eval_wer": 1.4043175997072814,
"step": 5200
},
{
"epoch": 10.251450676982591,
"eval_cer": 0.9909603757515453,
"eval_loss": 5.194947719573975,
"eval_runtime": 232.4545,
"eval_samples_per_second": 23.493,
"eval_steps_per_second": 2.938,
"eval_wer": 1.5686059275521405,
"step": 5300
},
{
"epoch": 10.444874274661508,
"eval_cer": 0.9908170235013366,
"eval_loss": 5.151918888092041,
"eval_runtime": 231.5581,
"eval_samples_per_second": 23.584,
"eval_steps_per_second": 2.95,
"eval_wer": 1.5166483717526527,
"step": 5400
},
{
"epoch": 10.638297872340425,
"grad_norm": 2.0632503032684326,
"learning_rate": 1.3185600000000001e-05,
"loss": 5.2163,
"step": 5500
},
{
"epoch": 10.638297872340425,
"eval_cer": 0.9909603757515453,
"eval_loss": 5.108114242553711,
"eval_runtime": 232.6627,
"eval_samples_per_second": 23.472,
"eval_steps_per_second": 2.936,
"eval_wer": 1.247713135748262,
"step": 5500
},
{
"epoch": 10.831721470019342,
"eval_cer": 0.9907664285894981,
"eval_loss": 5.055335998535156,
"eval_runtime": 231.9668,
"eval_samples_per_second": 23.542,
"eval_steps_per_second": 2.944,
"eval_wer": 1.5124405415294548,
"step": 5600
},
{
"epoch": 11.02514506769826,
"eval_cer": 0.9908760508984813,
"eval_loss": 5.012271881103516,
"eval_runtime": 231.643,
"eval_samples_per_second": 23.575,
"eval_steps_per_second": 2.949,
"eval_wer": 1.5495792169776803,
"step": 5700
},
{
"epoch": 11.218568665377177,
"eval_cer": 0.9885824148951421,
"eval_loss": 4.942389965057373,
"eval_runtime": 232.3058,
"eval_samples_per_second": 23.508,
"eval_steps_per_second": 2.94,
"eval_wer": 1.7621661178192463,
"step": 5800
},
{
"epoch": 11.411992263056092,
"eval_cer": 0.9830675695047602,
"eval_loss": 4.8753485679626465,
"eval_runtime": 233.7911,
"eval_samples_per_second": 23.358,
"eval_steps_per_second": 2.921,
"eval_wer": 1.5404317599707282,
"step": 5900
},
{
"epoch": 11.60541586073501,
"grad_norm": 1.9146788120269775,
"learning_rate": 1.43856e-05,
"loss": 4.9465,
"step": 6000
},
{
"epoch": 11.60541586073501,
"eval_cer": 0.9749808160959279,
"eval_loss": 4.77677059173584,
"eval_runtime": 232.0203,
"eval_samples_per_second": 23.537,
"eval_steps_per_second": 2.944,
"eval_wer": 1.853457738748628,
"step": 6000
},
{
"epoch": 11.798839458413926,
"eval_cer": 0.9713126849876464,
"eval_loss": 4.6841044425964355,
"eval_runtime": 231.7273,
"eval_samples_per_second": 23.566,
"eval_steps_per_second": 2.947,
"eval_wer": 1.8395536040980607,
"step": 6100
},
{
"epoch": 11.992263056092844,
"eval_cer": 0.9697020802941251,
"eval_loss": 4.582820892333984,
"eval_runtime": 232.9124,
"eval_samples_per_second": 23.447,
"eval_steps_per_second": 2.932,
"eval_wer": 1.7444200512257593,
"step": 6200
},
{
"epoch": 12.18568665377176,
"eval_cer": 0.968909426675324,
"eval_loss": 4.485307693481445,
"eval_runtime": 232.9842,
"eval_samples_per_second": 23.439,
"eval_steps_per_second": 2.932,
"eval_wer": 1.801317233809001,
"step": 6300
},
{
"epoch": 12.379110251450676,
"eval_cer": 0.9556029648618337,
"eval_loss": 4.395504951477051,
"eval_runtime": 233.0172,
"eval_samples_per_second": 23.436,
"eval_steps_per_second": 2.931,
"eval_wer": 1.827844859129162,
"step": 6400
},
{
"epoch": 12.572533849129593,
"grad_norm": 4.459765434265137,
"learning_rate": 1.55856e-05,
"loss": 4.5094,
"step": 6500
},
{
"epoch": 12.572533849129593,
"eval_cer": 0.9123021528134987,
"eval_loss": 4.284241676330566,
"eval_runtime": 230.5488,
"eval_samples_per_second": 23.687,
"eval_steps_per_second": 2.962,
"eval_wer": 1.8728503476033662,
"step": 6500
},
{
"epoch": 12.76595744680851,
"eval_cer": 0.8650380726711584,
"eval_loss": 4.181938171386719,
"eval_runtime": 232.0403,
"eval_samples_per_second": 23.535,
"eval_steps_per_second": 2.943,
"eval_wer": 1.9094401756311745,
"step": 6600
},
{
"epoch": 12.959381044487428,
"eval_cer": 0.848628456264915,
"eval_loss": 4.074057579040527,
"eval_runtime": 231.4468,
"eval_samples_per_second": 23.595,
"eval_steps_per_second": 2.951,
"eval_wer": 1.9134650567142335,
"step": 6700
},
{
"epoch": 13.152804642166345,
"eval_cer": 0.8385937987503057,
"eval_loss": 3.9648523330688477,
"eval_runtime": 231.3067,
"eval_samples_per_second": 23.609,
"eval_steps_per_second": 2.953,
"eval_wer": 1.9191364800585438,
"step": 6800
},
{
"epoch": 13.346228239845262,
"eval_cer": 0.8189292430157941,
"eval_loss": 3.8640658855438232,
"eval_runtime": 237.0711,
"eval_samples_per_second": 23.035,
"eval_steps_per_second": 2.881,
"eval_wer": 1.9195023783388219,
"step": 6900
},
{
"epoch": 13.539651837524177,
"grad_norm": 4.586193084716797,
"learning_rate": 1.67856e-05,
"loss": 4.0097,
"step": 7000
},
{
"epoch": 13.539651837524177,
"eval_cer": 0.8014487009756386,
"eval_loss": 3.7686750888824463,
"eval_runtime": 244.328,
"eval_samples_per_second": 22.351,
"eval_steps_per_second": 2.795,
"eval_wer": 1.9275521405049396,
"step": 7000
},
{
"epoch": 13.733075435203094,
"eval_cer": 0.7963301823946571,
"eval_loss": 3.680776596069336,
"eval_runtime": 230.3109,
"eval_samples_per_second": 23.711,
"eval_steps_per_second": 2.966,
"eval_wer": 1.9259055982436883,
"step": 7100
},
{
"epoch": 13.926499032882012,
"eval_cer": 0.779220669707983,
"eval_loss": 3.60208797454834,
"eval_runtime": 229.5487,
"eval_samples_per_second": 23.79,
"eval_steps_per_second": 2.975,
"eval_wer": 1.9275521405049396,
"step": 7200
},
{
"epoch": 14.119922630560929,
"eval_cer": 0.777466712764253,
"eval_loss": 3.55332088470459,
"eval_runtime": 231.4776,
"eval_samples_per_second": 23.592,
"eval_steps_per_second": 2.951,
"eval_wer": 1.9366995975118917,
"step": 7300
},
{
"epoch": 14.313346228239846,
"eval_cer": 0.775071886937237,
"eval_loss": 3.476841449737549,
"eval_runtime": 240.5885,
"eval_samples_per_second": 22.699,
"eval_steps_per_second": 2.839,
"eval_wer": 1.9321258690084158,
"step": 7400
},
{
"epoch": 14.506769825918763,
"grad_norm": 4.310749053955078,
"learning_rate": 1.7985600000000003e-05,
"loss": 3.5619,
"step": 7500
},
{
"epoch": 14.506769825918763,
"eval_cer": 0.7672043781463711,
"eval_loss": 3.4284844398498535,
"eval_runtime": 240.9512,
"eval_samples_per_second": 22.664,
"eval_steps_per_second": 2.835,
"eval_wer": 1.938529088913282,
"step": 7500
},
{
"epoch": 14.700193423597678,
"eval_cer": 0.7659816677769439,
"eval_loss": 3.362793445587158,
"eval_runtime": 230.1401,
"eval_samples_per_second": 23.729,
"eval_steps_per_second": 2.968,
"eval_wer": 1.9361507500914745,
"step": 7600
},
{
"epoch": 14.893617021276595,
"eval_cer": 0.7618919124033426,
"eval_loss": 3.2909657955169678,
"eval_runtime": 233.6336,
"eval_samples_per_second": 23.374,
"eval_steps_per_second": 2.923,
"eval_wer": 1.9313940724478595,
"step": 7700
},
{
"epoch": 15.087040618955513,
"eval_cer": 0.7486360455016907,
"eval_loss": 3.2242627143859863,
"eval_runtime": 241.9894,
"eval_samples_per_second": 22.567,
"eval_steps_per_second": 2.822,
"eval_wer": 1.928832784485913,
"step": 7800
},
{
"epoch": 15.28046421663443,
"eval_cer": 0.7431802275084536,
"eval_loss": 3.164518117904663,
"eval_runtime": 232.2322,
"eval_samples_per_second": 23.515,
"eval_steps_per_second": 2.941,
"eval_wer": 1.9308452250274424,
"step": 7900
},
{
"epoch": 15.473887814313347,
"grad_norm": 5.67767333984375,
"learning_rate": 1.91856e-05,
"loss": 3.2379,
"step": 8000
},
{
"epoch": 15.473887814313347,
"eval_cer": 0.7382556560895193,
"eval_loss": 3.1185944080352783,
"eval_runtime": 235.8886,
"eval_samples_per_second": 23.151,
"eval_steps_per_second": 2.895,
"eval_wer": 1.9332235638492499,
"step": 8000
},
{
"epoch": 15.667311411992262,
"eval_cer": 0.7374714349560246,
"eval_loss": 3.078275203704834,
"eval_runtime": 234.7574,
"eval_samples_per_second": 23.262,
"eval_steps_per_second": 2.909,
"eval_wer": 1.9348701061105014,
"step": 8100
},
{
"epoch": 15.86073500967118,
"eval_cer": 0.727942726559799,
"eval_loss": 3.0145950317382812,
"eval_runtime": 231.4339,
"eval_samples_per_second": 23.596,
"eval_steps_per_second": 2.951,
"eval_wer": 1.9321258690084158,
"step": 8200
},
{
"epoch": 16.054158607350097,
"eval_cer": 0.7300086854598656,
"eval_loss": 2.9523487091064453,
"eval_runtime": 235.1315,
"eval_samples_per_second": 23.225,
"eval_steps_per_second": 2.905,
"eval_wer": 1.9308452250274424,
"step": 8300
},
{
"epoch": 16.247582205029012,
"eval_cer": 0.7254467109091063,
"eval_loss": 2.918687105178833,
"eval_runtime": 238.3374,
"eval_samples_per_second": 22.913,
"eval_steps_per_second": 2.866,
"eval_wer": 1.9273691913648006,
"step": 8400
},
{
"epoch": 16.44100580270793,
"grad_norm": 8.755329132080078,
"learning_rate": 2.03856e-05,
"loss": 2.9448,
"step": 8500
},
{
"epoch": 16.44100580270793,
"eval_cer": 0.7177394193390618,
"eval_loss": 2.8671371936798096,
"eval_runtime": 229.0989,
"eval_samples_per_second": 23.837,
"eval_steps_per_second": 2.981,
"eval_wer": 1.929015733626052,
"step": 8500
},
{
"epoch": 16.634429400386846,
"eval_cer": 0.7115752725800876,
"eval_loss": 2.8188540935516357,
"eval_runtime": 230.6192,
"eval_samples_per_second": 23.68,
"eval_steps_per_second": 2.962,
"eval_wer": 1.9348701061105014,
"step": 8600
},
{
"epoch": 16.827852998065765,
"eval_cer": 0.70778065419221,
"eval_loss": 2.76908802986145,
"eval_runtime": 239.0894,
"eval_samples_per_second": 22.841,
"eval_steps_per_second": 2.857,
"eval_wer": 1.9365166483717526,
"step": 8700
},
{
"epoch": 17.02127659574468,
"eval_cer": 0.7069205406909579,
"eval_loss": 2.731661081314087,
"eval_runtime": 231.1522,
"eval_samples_per_second": 23.625,
"eval_steps_per_second": 2.955,
"eval_wer": 1.9420051225759238,
"step": 8800
},
{
"epoch": 17.214700193423596,
"eval_cer": 0.7056388029243859,
"eval_loss": 2.683185577392578,
"eval_runtime": 229.2327,
"eval_samples_per_second": 23.823,
"eval_steps_per_second": 2.98,
"eval_wer": 1.9489571899012075,
"step": 8900
},
{
"epoch": 17.408123791102515,
"grad_norm": 4.358935832977295,
"learning_rate": 2.1585600000000002e-05,
"loss": 2.6749,
"step": 9000
},
{
"epoch": 17.408123791102515,
"eval_cer": 0.7020128342426364,
"eval_loss": 2.6420364379882812,
"eval_runtime": 229.374,
"eval_samples_per_second": 23.808,
"eval_steps_per_second": 2.978,
"eval_wer": 1.978412001463593,
"step": 9000
},
{
"epoch": 17.60154738878143,
"eval_cer": 0.6991204917825431,
"eval_loss": 2.601982831954956,
"eval_runtime": 229.7146,
"eval_samples_per_second": 23.773,
"eval_steps_per_second": 2.973,
"eval_wer": 1.9414562751555067,
"step": 9100
},
{
"epoch": 17.79497098646035,
"eval_cer": 0.6994577911947989,
"eval_loss": 2.5666821002960205,
"eval_runtime": 234.5424,
"eval_samples_per_second": 23.284,
"eval_steps_per_second": 2.912,
"eval_wer": 1.9762166117819246,
"step": 9200
},
{
"epoch": 17.988394584139265,
"eval_cer": 0.6771285701034666,
"eval_loss": 2.517096757888794,
"eval_runtime": 247.5848,
"eval_samples_per_second": 22.057,
"eval_steps_per_second": 2.759,
"eval_wer": 1.9857299670691548,
"step": 9300
},
{
"epoch": 18.181818181818183,
"eval_cer": 0.6774658695157224,
"eval_loss": 2.492238759994507,
"eval_runtime": 229.5938,
"eval_samples_per_second": 23.785,
"eval_steps_per_second": 2.975,
"eval_wer": 1.9890230515916576,
"step": 9400
},
{
"epoch": 18.3752417794971,
"grad_norm": 6.681089878082275,
"learning_rate": 2.27856e-05,
"loss": 2.4473,
"step": 9500
},
{
"epoch": 18.3752417794971,
"eval_cer": 0.6682575955611397,
"eval_loss": 2.445500373840332,
"eval_runtime": 229.8314,
"eval_samples_per_second": 23.761,
"eval_steps_per_second": 2.972,
"eval_wer": 1.9882912550311014,
"step": 9500
},
{
"epoch": 18.568665377176014,
"eval_cer": 0.6620681513462463,
"eval_loss": 2.4191782474517822,
"eval_runtime": 230.0128,
"eval_samples_per_second": 23.742,
"eval_steps_per_second": 2.969,
"eval_wer": 1.9815221368459568,
"step": 9600
},
{
"epoch": 18.762088974854933,
"eval_cer": 0.6523454957879736,
"eval_loss": 2.386597156524658,
"eval_runtime": 240.9084,
"eval_samples_per_second": 22.668,
"eval_steps_per_second": 2.835,
"eval_wer": 1.9904866447127698,
"step": 9700
},
{
"epoch": 18.95551257253385,
"eval_cer": 0.6539392355108822,
"eval_loss": 2.335400342941284,
"eval_runtime": 233.6249,
"eval_samples_per_second": 23.375,
"eval_steps_per_second": 2.923,
"eval_wer": 1.991401390413465,
"step": 9800
},
{
"epoch": 19.148936170212767,
"eval_cer": 0.6515612746544789,
"eval_loss": 2.3113534450531006,
"eval_runtime": 252.8489,
"eval_samples_per_second": 21.598,
"eval_steps_per_second": 2.701,
"eval_wer": 1.9924990852542992,
"step": 9900
},
{
"epoch": 19.342359767891683,
"grad_norm": 8.786458015441895,
"learning_rate": 2.39856e-05,
"loss": 2.2307,
"step": 10000
},
{
"epoch": 19.342359767891683,
"eval_cer": 0.645447722807343,
"eval_loss": 2.269487142562866,
"eval_runtime": 236.1588,
"eval_samples_per_second": 23.124,
"eval_steps_per_second": 2.892,
"eval_wer": 1.9903036955726308,
"step": 10000
},
{
"epoch": 19.535783365570598,
"eval_cer": 0.6464258911028847,
"eval_loss": 2.246647834777832,
"eval_runtime": 241.6204,
"eval_samples_per_second": 22.602,
"eval_steps_per_second": 2.827,
"eval_wer": 1.9924990852542992,
"step": 10100
},
{
"epoch": 19.729206963249517,
"eval_cer": 0.6422939733027515,
"eval_loss": 2.2167210578918457,
"eval_runtime": 230.6902,
"eval_samples_per_second": 23.672,
"eval_steps_per_second": 2.961,
"eval_wer": 1.9928649835345773,
"step": 10200
},
{
"epoch": 19.922630560928432,
"eval_cer": 0.641256777610065,
"eval_loss": 2.176206588745117,
"eval_runtime": 236.5184,
"eval_samples_per_second": 23.089,
"eval_steps_per_second": 2.888,
"eval_wer": 1.991401390413465,
"step": 10300
},
{
"epoch": 20.0,
"step": 10340,
"total_flos": 1.4857862206321902e+19,
"train_loss": 15.407913101757057,
"train_runtime": 65838.1292,
"train_samples_per_second": 5.022,
"train_steps_per_second": 0.157
}
],
"logging_steps": 500,
"max_steps": 10340,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 400,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.4857862206321902e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}