|
{ |
|
"best_metric": 0.7032522559165955, |
|
"best_model_checkpoint": "./Hubert-noisy_common_voice_debug/checkpoint-3700", |
|
"epoch": 30.0, |
|
"eval_steps": 100, |
|
"global_step": 11280, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.26595744680851063, |
|
"eval_cer": 1.0538678168650442, |
|
"eval_loss": 12.082180976867676, |
|
"eval_runtime": 151.0989, |
|
"eval_samples_per_second": 32.833, |
|
"eval_steps_per_second": 4.11, |
|
"eval_wer": 1.1570247933884297, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5319148936170213, |
|
"eval_cer": 0.9816708830000044, |
|
"eval_loss": 5.878890037536621, |
|
"eval_runtime": 151.597, |
|
"eval_samples_per_second": 32.725, |
|
"eval_steps_per_second": 4.096, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7978723404255319, |
|
"eval_cer": 0.9816665437204115, |
|
"eval_loss": 5.362728595733643, |
|
"eval_runtime": 149.1154, |
|
"eval_samples_per_second": 33.27, |
|
"eval_steps_per_second": 4.165, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0638297872340425, |
|
"eval_cer": 0.9816752222795971, |
|
"eval_loss": 4.9316487312316895, |
|
"eval_runtime": 149.2108, |
|
"eval_samples_per_second": 33.248, |
|
"eval_steps_per_second": 4.162, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.3297872340425532, |
|
"grad_norm": 18.269367218017578, |
|
"learning_rate": 1.1927999999999998e-05, |
|
"loss": 6.372, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3297872340425532, |
|
"eval_cer": 0.9816752222795971, |
|
"eval_loss": 4.455592632293701, |
|
"eval_runtime": 149.4229, |
|
"eval_samples_per_second": 33.201, |
|
"eval_steps_per_second": 4.156, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.5957446808510638, |
|
"eval_cer": 0.9816752222795971, |
|
"eval_loss": 3.989032506942749, |
|
"eval_runtime": 148.7951, |
|
"eval_samples_per_second": 33.341, |
|
"eval_steps_per_second": 4.174, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.8617021276595744, |
|
"eval_cer": 0.9816622044408188, |
|
"eval_loss": 3.57344126701355, |
|
"eval_runtime": 148.5583, |
|
"eval_samples_per_second": 33.394, |
|
"eval_steps_per_second": 4.18, |
|
"eval_wer": 1.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"eval_cer": 0.9816708830000044, |
|
"eval_loss": 3.2931714057922363, |
|
"eval_runtime": 148.3742, |
|
"eval_samples_per_second": 33.436, |
|
"eval_steps_per_second": 4.185, |
|
"eval_wer": 1.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.393617021276596, |
|
"eval_cer": 0.9816839008387828, |
|
"eval_loss": 3.1536319255828857, |
|
"eval_runtime": 148.8499, |
|
"eval_samples_per_second": 33.329, |
|
"eval_steps_per_second": 4.172, |
|
"eval_wer": 1.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"grad_norm": 1.548904299736023, |
|
"learning_rate": 2.3928e-05, |
|
"loss": 3.4101, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"eval_cer": 0.9816622044408188, |
|
"eval_loss": 3.0483851432800293, |
|
"eval_runtime": 148.9657, |
|
"eval_samples_per_second": 33.303, |
|
"eval_steps_per_second": 4.169, |
|
"eval_wer": 1.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.925531914893617, |
|
"eval_cer": 0.9807552950059231, |
|
"eval_loss": 2.8469536304473877, |
|
"eval_runtime": 148.996, |
|
"eval_samples_per_second": 33.296, |
|
"eval_steps_per_second": 4.168, |
|
"eval_wer": 1.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.1914893617021276, |
|
"eval_cer": 0.8702078080996993, |
|
"eval_loss": 2.521139144897461, |
|
"eval_runtime": 149.4933, |
|
"eval_samples_per_second": 33.185, |
|
"eval_steps_per_second": 4.154, |
|
"eval_wer": 1.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.4574468085106385, |
|
"eval_cer": 0.5518348643758163, |
|
"eval_loss": 2.035446882247925, |
|
"eval_runtime": 150.325, |
|
"eval_samples_per_second": 33.002, |
|
"eval_steps_per_second": 4.131, |
|
"eval_wer": 1.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.723404255319149, |
|
"eval_cer": 0.44136548450226293, |
|
"eval_loss": 1.678019404411316, |
|
"eval_runtime": 150.3487, |
|
"eval_samples_per_second": 32.997, |
|
"eval_steps_per_second": 4.13, |
|
"eval_wer": 1.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.9893617021276597, |
|
"grad_norm": 4.6614580154418945, |
|
"learning_rate": 3.5928e-05, |
|
"loss": 2.3222, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.9893617021276597, |
|
"eval_cer": 0.43124194521225584, |
|
"eval_loss": 1.5038928985595703, |
|
"eval_runtime": 150.8122, |
|
"eval_samples_per_second": 32.895, |
|
"eval_steps_per_second": 4.118, |
|
"eval_wer": 1.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.25531914893617, |
|
"eval_cer": 0.396458279996355, |
|
"eval_loss": 1.3418982028961182, |
|
"eval_runtime": 150.4572, |
|
"eval_samples_per_second": 32.973, |
|
"eval_steps_per_second": 4.127, |
|
"eval_wer": 1.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.5212765957446805, |
|
"eval_cer": 0.3685610514942309, |
|
"eval_loss": 1.2053722143173218, |
|
"eval_runtime": 150.9364, |
|
"eval_samples_per_second": 32.868, |
|
"eval_steps_per_second": 4.114, |
|
"eval_wer": 1.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.787234042553192, |
|
"eval_cer": 0.3321067636351013, |
|
"eval_loss": 1.0588372945785522, |
|
"eval_runtime": 150.9127, |
|
"eval_samples_per_second": 32.873, |
|
"eval_steps_per_second": 4.115, |
|
"eval_wer": 1.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.053191489361702, |
|
"eval_cer": 0.31576069740901613, |
|
"eval_loss": 0.9545913338661194, |
|
"eval_runtime": 150.8858, |
|
"eval_samples_per_second": 32.879, |
|
"eval_steps_per_second": 4.116, |
|
"eval_wer": 1.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"grad_norm": 4.519205570220947, |
|
"learning_rate": 4.7928e-05, |
|
"loss": 1.2343, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"eval_cer": 0.3105969546935818, |
|
"eval_loss": 0.9041823148727417, |
|
"eval_runtime": 150.7252, |
|
"eval_samples_per_second": 32.914, |
|
"eval_steps_per_second": 4.12, |
|
"eval_wer": 1.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.585106382978723, |
|
"eval_cer": 0.30879615366256896, |
|
"eval_loss": 0.8747320771217346, |
|
"eval_runtime": 151.661, |
|
"eval_samples_per_second": 32.711, |
|
"eval_steps_per_second": 4.095, |
|
"eval_wer": 1.0, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.851063829787234, |
|
"eval_cer": 0.2971885807518236, |
|
"eval_loss": 0.8223557472229004, |
|
"eval_runtime": 155.0175, |
|
"eval_samples_per_second": 32.003, |
|
"eval_steps_per_second": 4.006, |
|
"eval_wer": 1.0, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.117021276595745, |
|
"eval_cer": 0.2995751845278647, |
|
"eval_loss": 0.8100646138191223, |
|
"eval_runtime": 151.2405, |
|
"eval_samples_per_second": 32.802, |
|
"eval_steps_per_second": 4.106, |
|
"eval_wer": 1.0, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.382978723404255, |
|
"eval_cer": 0.2969933131701475, |
|
"eval_loss": 0.7891868948936462, |
|
"eval_runtime": 151.3984, |
|
"eval_samples_per_second": 32.768, |
|
"eval_steps_per_second": 4.102, |
|
"eval_wer": 1.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.648936170212766, |
|
"grad_norm": 7.927753925323486, |
|
"learning_rate": 5.9903999999999994e-05, |
|
"loss": 0.8716, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.648936170212766, |
|
"eval_cer": 0.29149544592606735, |
|
"eval_loss": 0.7661293148994446, |
|
"eval_runtime": 152.5094, |
|
"eval_samples_per_second": 32.529, |
|
"eval_steps_per_second": 4.072, |
|
"eval_wer": 1.0, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.914893617021277, |
|
"eval_cer": 0.28864019995400364, |
|
"eval_loss": 0.7654036283493042, |
|
"eval_runtime": 151.4659, |
|
"eval_samples_per_second": 32.753, |
|
"eval_steps_per_second": 4.1, |
|
"eval_wer": 1.0, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.180851063829787, |
|
"eval_cer": 0.28976841264813213, |
|
"eval_loss": 0.767704963684082, |
|
"eval_runtime": 151.3213, |
|
"eval_samples_per_second": 32.785, |
|
"eval_steps_per_second": 4.104, |
|
"eval_wer": 1.0, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 7.446808510638298, |
|
"eval_cer": 0.28610606067180727, |
|
"eval_loss": 0.752834141254425, |
|
"eval_runtime": 151.5246, |
|
"eval_samples_per_second": 32.741, |
|
"eval_steps_per_second": 4.098, |
|
"eval_wer": 1.0, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.712765957446808, |
|
"eval_cer": 0.28800232585386176, |
|
"eval_loss": 0.7432949542999268, |
|
"eval_runtime": 155.4404, |
|
"eval_samples_per_second": 31.916, |
|
"eval_steps_per_second": 3.995, |
|
"eval_wer": 1.0, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 7.9787234042553195, |
|
"grad_norm": 6.501748085021973, |
|
"learning_rate": 7.1904e-05, |
|
"loss": 0.7324, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.9787234042553195, |
|
"eval_cer": 0.2876595227660304, |
|
"eval_loss": 0.7498476505279541, |
|
"eval_runtime": 151.1874, |
|
"eval_samples_per_second": 32.814, |
|
"eval_steps_per_second": 4.107, |
|
"eval_wer": 1.0, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.24468085106383, |
|
"eval_cer": 0.2827474582669785, |
|
"eval_loss": 0.7267456650733948, |
|
"eval_runtime": 151.7111, |
|
"eval_samples_per_second": 32.7, |
|
"eval_steps_per_second": 4.093, |
|
"eval_wer": 1.0, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 8.51063829787234, |
|
"eval_cer": 0.28125474608705464, |
|
"eval_loss": 0.7319472432136536, |
|
"eval_runtime": 151.6994, |
|
"eval_samples_per_second": 32.703, |
|
"eval_steps_per_second": 4.094, |
|
"eval_wer": 1.0, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 8.77659574468085, |
|
"eval_cer": 0.28822796839268744, |
|
"eval_loss": 0.7477809190750122, |
|
"eval_runtime": 151.7004, |
|
"eval_samples_per_second": 32.703, |
|
"eval_steps_per_second": 4.094, |
|
"eval_wer": 1.0, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 9.042553191489361, |
|
"eval_cer": 0.2814543529483235, |
|
"eval_loss": 0.7336912751197815, |
|
"eval_runtime": 151.8383, |
|
"eval_samples_per_second": 32.673, |
|
"eval_steps_per_second": 4.09, |
|
"eval_wer": 1.0, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 9.308510638297872, |
|
"grad_norm": 6.825859546661377, |
|
"learning_rate": 8.390399999999999e-05, |
|
"loss": 0.6486, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.308510638297872, |
|
"eval_cer": 0.28514274060220524, |
|
"eval_loss": 0.734130322933197, |
|
"eval_runtime": 151.4163, |
|
"eval_samples_per_second": 32.764, |
|
"eval_steps_per_second": 4.101, |
|
"eval_wer": 1.0, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.574468085106384, |
|
"eval_cer": 0.2803218009746022, |
|
"eval_loss": 0.74192875623703, |
|
"eval_runtime": 151.3965, |
|
"eval_samples_per_second": 32.768, |
|
"eval_steps_per_second": 4.102, |
|
"eval_wer": 1.0, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.840425531914894, |
|
"eval_cer": 0.277323358775976, |
|
"eval_loss": 0.7032522559165955, |
|
"eval_runtime": 151.1826, |
|
"eval_samples_per_second": 32.815, |
|
"eval_steps_per_second": 4.108, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 10.106382978723405, |
|
"eval_cer": 0.2829253687302834, |
|
"eval_loss": 0.7326785922050476, |
|
"eval_runtime": 151.1178, |
|
"eval_samples_per_second": 32.829, |
|
"eval_steps_per_second": 4.109, |
|
"eval_wer": 1.0, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 10.372340425531915, |
|
"eval_cer": 0.2855332757655574, |
|
"eval_loss": 0.7554399967193604, |
|
"eval_runtime": 151.3763, |
|
"eval_samples_per_second": 32.773, |
|
"eval_steps_per_second": 4.102, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"grad_norm": 5.65977144241333, |
|
"learning_rate": 9.5904e-05, |
|
"loss": 0.6034, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"eval_cer": 0.2841360277366752, |
|
"eval_loss": 0.7361173033714294, |
|
"eval_runtime": 151.2103, |
|
"eval_samples_per_second": 32.809, |
|
"eval_steps_per_second": 4.107, |
|
"eval_wer": 1.0, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.904255319148936, |
|
"eval_cer": 0.2833202431732284, |
|
"eval_loss": 0.745939314365387, |
|
"eval_runtime": 151.41, |
|
"eval_samples_per_second": 32.765, |
|
"eval_steps_per_second": 4.101, |
|
"eval_wer": 1.0, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 11.170212765957446, |
|
"eval_cer": 0.2801352119521117, |
|
"eval_loss": 0.7383919358253479, |
|
"eval_runtime": 151.4933, |
|
"eval_samples_per_second": 32.747, |
|
"eval_steps_per_second": 4.099, |
|
"eval_wer": 1.0, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 11.436170212765958, |
|
"eval_cer": 0.27755767987398733, |
|
"eval_loss": 0.7336695194244385, |
|
"eval_runtime": 151.6319, |
|
"eval_samples_per_second": 32.717, |
|
"eval_steps_per_second": 4.095, |
|
"eval_wer": 1.0, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 11.702127659574469, |
|
"eval_cer": 0.2819099773055677, |
|
"eval_loss": 0.7572408318519592, |
|
"eval_runtime": 151.2617, |
|
"eval_samples_per_second": 32.797, |
|
"eval_steps_per_second": 4.105, |
|
"eval_wer": 1.0, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 11.96808510638298, |
|
"grad_norm": 5.250299453735352, |
|
"learning_rate": 0.00010790399999999999, |
|
"loss": 0.5687, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 11.96808510638298, |
|
"eval_cer": 0.2824393694158896, |
|
"eval_loss": 0.7522115707397461, |
|
"eval_runtime": 151.5116, |
|
"eval_samples_per_second": 32.743, |
|
"eval_steps_per_second": 4.099, |
|
"eval_wer": 1.0, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 12.23404255319149, |
|
"eval_cer": 0.27894624934368395, |
|
"eval_loss": 0.7491241097450256, |
|
"eval_runtime": 151.5087, |
|
"eval_samples_per_second": 32.744, |
|
"eval_steps_per_second": 4.099, |
|
"eval_wer": 1.0, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_cer": 0.2831987433446299, |
|
"eval_loss": 0.7484813928604126, |
|
"eval_runtime": 151.4358, |
|
"eval_samples_per_second": 32.76, |
|
"eval_steps_per_second": 4.101, |
|
"eval_wer": 1.0, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 12.76595744680851, |
|
"eval_cer": 0.2849040802246011, |
|
"eval_loss": 0.7622763514518738, |
|
"eval_runtime": 151.3314, |
|
"eval_samples_per_second": 32.782, |
|
"eval_steps_per_second": 4.104, |
|
"eval_wer": 1.0, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 13.03191489361702, |
|
"eval_cer": 0.285936828767688, |
|
"eval_loss": 0.7829335331916809, |
|
"eval_runtime": 151.6153, |
|
"eval_samples_per_second": 32.721, |
|
"eval_steps_per_second": 4.096, |
|
"eval_wer": 1.0, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 13.297872340425531, |
|
"grad_norm": 4.461331844329834, |
|
"learning_rate": 0.00011990399999999998, |
|
"loss": 0.5255, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.297872340425531, |
|
"eval_cer": 0.28200110217701657, |
|
"eval_loss": 0.7818682789802551, |
|
"eval_runtime": 151.2326, |
|
"eval_samples_per_second": 32.804, |
|
"eval_steps_per_second": 4.106, |
|
"eval_wer": 1.0, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.563829787234042, |
|
"eval_cer": 0.2824393694158896, |
|
"eval_loss": 0.7782630324363708, |
|
"eval_runtime": 151.6654, |
|
"eval_samples_per_second": 32.71, |
|
"eval_steps_per_second": 4.095, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 13.829787234042554, |
|
"eval_cer": 0.2840275457468551, |
|
"eval_loss": 0.7653459906578064, |
|
"eval_runtime": 151.0873, |
|
"eval_samples_per_second": 32.835, |
|
"eval_steps_per_second": 4.11, |
|
"eval_wer": 1.0, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 14.095744680851064, |
|
"eval_cer": 0.28220938759747105, |
|
"eval_loss": 0.7816305756568909, |
|
"eval_runtime": 151.1619, |
|
"eval_samples_per_second": 32.819, |
|
"eval_steps_per_second": 4.108, |
|
"eval_wer": 1.0, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 14.361702127659575, |
|
"eval_cer": 0.2824133337383328, |
|
"eval_loss": 0.7607858180999756, |
|
"eval_runtime": 150.8484, |
|
"eval_samples_per_second": 32.887, |
|
"eval_steps_per_second": 4.117, |
|
"eval_wer": 1.0, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 14.627659574468085, |
|
"grad_norm": 4.656615257263184, |
|
"learning_rate": 0.000131904, |
|
"loss": 0.5016, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.627659574468085, |
|
"eval_cer": 0.28411433133871117, |
|
"eval_loss": 0.7712321281433105, |
|
"eval_runtime": 151.045, |
|
"eval_samples_per_second": 32.845, |
|
"eval_steps_per_second": 4.111, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.893617021276595, |
|
"eval_cer": 0.286396792404525, |
|
"eval_loss": 0.7712020874023438, |
|
"eval_runtime": 151.0967, |
|
"eval_samples_per_second": 32.833, |
|
"eval_steps_per_second": 4.11, |
|
"eval_wer": 1.0, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 15.159574468085106, |
|
"eval_cer": 0.285060294289942, |
|
"eval_loss": 0.8153163194656372, |
|
"eval_runtime": 151.4841, |
|
"eval_samples_per_second": 32.749, |
|
"eval_steps_per_second": 4.099, |
|
"eval_wer": 0.999596855472687, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 15.425531914893616, |
|
"eval_cer": 0.2852338654736541, |
|
"eval_loss": 0.8161126375198364, |
|
"eval_runtime": 150.9671, |
|
"eval_samples_per_second": 32.861, |
|
"eval_steps_per_second": 4.113, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 15.691489361702128, |
|
"eval_cer": 0.28826702190902265, |
|
"eval_loss": 0.7910758852958679, |
|
"eval_runtime": 151.0378, |
|
"eval_samples_per_second": 32.846, |
|
"eval_steps_per_second": 4.112, |
|
"eval_wer": 1.0, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 15.957446808510639, |
|
"grad_norm": 5.0891499519348145, |
|
"learning_rate": 0.00014390399999999998, |
|
"loss": 0.4821, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 15.957446808510639, |
|
"eval_cer": 0.28232654814647673, |
|
"eval_loss": 0.7926108241081238, |
|
"eval_runtime": 151.2593, |
|
"eval_samples_per_second": 32.798, |
|
"eval_steps_per_second": 4.106, |
|
"eval_wer": 1.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 16.22340425531915, |
|
"eval_cer": 0.28674393477194915, |
|
"eval_loss": 0.814721405506134, |
|
"eval_runtime": 150.8029, |
|
"eval_samples_per_second": 32.897, |
|
"eval_steps_per_second": 4.118, |
|
"eval_wer": 1.0, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 16.48936170212766, |
|
"eval_cer": 0.28262161915878725, |
|
"eval_loss": 0.7699744701385498, |
|
"eval_runtime": 150.7429, |
|
"eval_samples_per_second": 32.91, |
|
"eval_steps_per_second": 4.12, |
|
"eval_wer": 1.0, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 16.75531914893617, |
|
"eval_cer": 0.2909530359769671, |
|
"eval_loss": 0.8119201064109802, |
|
"eval_runtime": 150.6966, |
|
"eval_samples_per_second": 32.92, |
|
"eval_steps_per_second": 4.121, |
|
"eval_wer": 1.0, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 17.02127659574468, |
|
"eval_cer": 0.2845873128143266, |
|
"eval_loss": 0.835509181022644, |
|
"eval_runtime": 151.1321, |
|
"eval_samples_per_second": 32.826, |
|
"eval_steps_per_second": 4.109, |
|
"eval_wer": 1.0, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 17.28723404255319, |
|
"grad_norm": 4.005585670471191, |
|
"learning_rate": 0.00015587999999999998, |
|
"loss": 0.4503, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 17.28723404255319, |
|
"eval_cer": 0.28592381092890956, |
|
"eval_loss": 0.793612539768219, |
|
"eval_runtime": 150.9291, |
|
"eval_samples_per_second": 32.87, |
|
"eval_steps_per_second": 4.115, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 17.5531914893617, |
|
"eval_cer": 0.29515779790239227, |
|
"eval_loss": 0.7976419925689697, |
|
"eval_runtime": 169.2668, |
|
"eval_samples_per_second": 29.309, |
|
"eval_steps_per_second": 3.669, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 17.819148936170212, |
|
"eval_cer": 0.2901546085318915, |
|
"eval_loss": 0.8274039626121521, |
|
"eval_runtime": 158.3816, |
|
"eval_samples_per_second": 31.323, |
|
"eval_steps_per_second": 3.921, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 18.085106382978722, |
|
"eval_cer": 0.2885490750825548, |
|
"eval_loss": 0.903409481048584, |
|
"eval_runtime": 151.19, |
|
"eval_samples_per_second": 32.813, |
|
"eval_steps_per_second": 4.107, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 18.351063829787233, |
|
"eval_cer": 0.28819325415594504, |
|
"eval_loss": 0.8065901398658752, |
|
"eval_runtime": 152.3883, |
|
"eval_samples_per_second": 32.555, |
|
"eval_steps_per_second": 4.075, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 18.617021276595743, |
|
"grad_norm": 4.851210117340088, |
|
"learning_rate": 0.000167856, |
|
"loss": 0.4435, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 18.617021276595743, |
|
"eval_cer": 0.29208124867109564, |
|
"eval_loss": 0.849502682685852, |
|
"eval_runtime": 171.4215, |
|
"eval_samples_per_second": 28.94, |
|
"eval_steps_per_second": 3.623, |
|
"eval_wer": 1.0, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 18.882978723404257, |
|
"eval_cer": 0.2896121985827913, |
|
"eval_loss": 0.8448098301887512, |
|
"eval_runtime": 151.0183, |
|
"eval_samples_per_second": 32.85, |
|
"eval_steps_per_second": 4.112, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 19.148936170212767, |
|
"eval_cer": 0.29039760818908844, |
|
"eval_loss": 0.877412736415863, |
|
"eval_runtime": 155.1268, |
|
"eval_samples_per_second": 31.98, |
|
"eval_steps_per_second": 4.003, |
|
"eval_wer": 1.0, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 19.414893617021278, |
|
"eval_cer": 0.29732309841920046, |
|
"eval_loss": 0.8293471336364746, |
|
"eval_runtime": 151.2836, |
|
"eval_samples_per_second": 32.793, |
|
"eval_steps_per_second": 4.105, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 19.680851063829788, |
|
"eval_cer": 0.2924804623936334, |
|
"eval_loss": 0.8037762641906738, |
|
"eval_runtime": 151.1502, |
|
"eval_samples_per_second": 32.822, |
|
"eval_steps_per_second": 4.108, |
|
"eval_wer": 1.0, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 19.9468085106383, |
|
"grad_norm": 3.5915985107421875, |
|
"learning_rate": 0.000179856, |
|
"loss": 0.4457, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 19.9468085106383, |
|
"eval_cer": 0.29077512551366225, |
|
"eval_loss": 0.806207537651062, |
|
"eval_runtime": 154.4071, |
|
"eval_samples_per_second": 32.129, |
|
"eval_steps_per_second": 4.022, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 20.21276595744681, |
|
"eval_cer": 0.29176448126082105, |
|
"eval_loss": 0.8740444779396057, |
|
"eval_runtime": 154.5779, |
|
"eval_samples_per_second": 32.094, |
|
"eval_steps_per_second": 4.017, |
|
"eval_wer": 1.0, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 20.47872340425532, |
|
"eval_cer": 0.2977179728621454, |
|
"eval_loss": 0.848852276802063, |
|
"eval_runtime": 173.0214, |
|
"eval_samples_per_second": 28.673, |
|
"eval_steps_per_second": 3.589, |
|
"eval_wer": 1.0, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 20.74468085106383, |
|
"eval_cer": 0.29728404490286525, |
|
"eval_loss": 0.8606219291687012, |
|
"eval_runtime": 151.0098, |
|
"eval_samples_per_second": 32.852, |
|
"eval_steps_per_second": 4.112, |
|
"eval_wer": 1.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 21.01063829787234, |
|
"eval_cer": 0.29262799789978866, |
|
"eval_loss": 0.8141107559204102, |
|
"eval_runtime": 155.5633, |
|
"eval_samples_per_second": 31.891, |
|
"eval_steps_per_second": 3.992, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 21.27659574468085, |
|
"grad_norm": 5.225797176361084, |
|
"learning_rate": 0.000191856, |
|
"loss": 0.4252, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 21.27659574468085, |
|
"eval_cer": 0.29841225759699375, |
|
"eval_loss": 0.8832473158836365, |
|
"eval_runtime": 161.5174, |
|
"eval_samples_per_second": 30.715, |
|
"eval_steps_per_second": 3.845, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 21.54255319148936, |
|
"eval_cer": 0.29451124524306477, |
|
"eval_loss": 0.8589781522750854, |
|
"eval_runtime": 150.7991, |
|
"eval_samples_per_second": 32.898, |
|
"eval_steps_per_second": 4.118, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 21.80851063829787, |
|
"eval_cer": 0.2939644960143717, |
|
"eval_loss": 0.8304488062858582, |
|
"eval_runtime": 156.9786, |
|
"eval_samples_per_second": 31.603, |
|
"eval_steps_per_second": 3.956, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 22.074468085106382, |
|
"eval_cer": 0.29739686617227806, |
|
"eval_loss": 0.873356819152832, |
|
"eval_runtime": 151.6885, |
|
"eval_samples_per_second": 32.705, |
|
"eval_steps_per_second": 4.094, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 22.340425531914892, |
|
"eval_cer": 0.292992497385584, |
|
"eval_loss": 0.8416581153869629, |
|
"eval_runtime": 171.0572, |
|
"eval_samples_per_second": 29.002, |
|
"eval_steps_per_second": 3.63, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 22.606382978723403, |
|
"grad_norm": 6.305420875549316, |
|
"learning_rate": 0.00020385599999999998, |
|
"loss": 0.418, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 22.606382978723403, |
|
"eval_cer": 0.2993235063114822, |
|
"eval_loss": 0.9387206435203552, |
|
"eval_runtime": 154.7978, |
|
"eval_samples_per_second": 32.048, |
|
"eval_steps_per_second": 4.012, |
|
"eval_wer": 1.0, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 22.872340425531917, |
|
"eval_cer": 0.2996055594850143, |
|
"eval_loss": 0.8810231685638428, |
|
"eval_runtime": 154.5989, |
|
"eval_samples_per_second": 32.089, |
|
"eval_steps_per_second": 4.017, |
|
"eval_wer": 1.0, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 23.138297872340427, |
|
"eval_cer": 0.3074119234724651, |
|
"eval_loss": 0.9089604020118713, |
|
"eval_runtime": 151.1053, |
|
"eval_samples_per_second": 32.831, |
|
"eval_steps_per_second": 4.11, |
|
"eval_wer": 1.0, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 23.404255319148938, |
|
"eval_cer": 0.31074015092014423, |
|
"eval_loss": 0.8992817997932434, |
|
"eval_runtime": 154.115, |
|
"eval_samples_per_second": 32.19, |
|
"eval_steps_per_second": 4.029, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 23.670212765957448, |
|
"eval_cer": 0.3032852685797104, |
|
"eval_loss": 0.8724159598350525, |
|
"eval_runtime": 152.578, |
|
"eval_samples_per_second": 32.515, |
|
"eval_steps_per_second": 4.07, |
|
"eval_wer": 1.0, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 23.93617021276596, |
|
"grad_norm": 3.3926920890808105, |
|
"learning_rate": 0.000215856, |
|
"loss": 0.424, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 23.93617021276596, |
|
"eval_cer": 0.30421387441257003, |
|
"eval_loss": 0.8894767761230469, |
|
"eval_runtime": 168.9525, |
|
"eval_samples_per_second": 29.363, |
|
"eval_steps_per_second": 3.676, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 24.20212765957447, |
|
"eval_cer": 0.3013890033976559, |
|
"eval_loss": 0.8863051533699036, |
|
"eval_runtime": 151.1238, |
|
"eval_samples_per_second": 32.827, |
|
"eval_steps_per_second": 4.109, |
|
"eval_wer": 1.0, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 24.46808510638298, |
|
"eval_cer": 0.3112391680733165, |
|
"eval_loss": 0.9255392551422119, |
|
"eval_runtime": 151.1678, |
|
"eval_samples_per_second": 32.818, |
|
"eval_steps_per_second": 4.108, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 24.73404255319149, |
|
"eval_cer": 0.3010635574281958, |
|
"eval_loss": 0.9398043155670166, |
|
"eval_runtime": 151.9435, |
|
"eval_samples_per_second": 32.65, |
|
"eval_steps_per_second": 4.087, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_cer": 0.30710383462137614, |
|
"eval_loss": 0.8763216137886047, |
|
"eval_runtime": 157.5659, |
|
"eval_samples_per_second": 31.485, |
|
"eval_steps_per_second": 3.941, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 25.26595744680851, |
|
"grad_norm": 9.72305679321289, |
|
"learning_rate": 0.00022785599999999997, |
|
"loss": 0.4122, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 25.26595744680851, |
|
"eval_cer": 0.3090174569218018, |
|
"eval_loss": 0.9353150129318237, |
|
"eval_runtime": 152.5497, |
|
"eval_samples_per_second": 32.521, |
|
"eval_steps_per_second": 4.071, |
|
"eval_wer": 1.0, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 25.53191489361702, |
|
"eval_cer": 0.31283602296346763, |
|
"eval_loss": 0.9381543397903442, |
|
"eval_runtime": 171.9179, |
|
"eval_samples_per_second": 28.857, |
|
"eval_steps_per_second": 3.612, |
|
"eval_wer": 1.0, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 25.79787234042553, |
|
"eval_cer": 0.3101673660138944, |
|
"eval_loss": 0.9294571876525879, |
|
"eval_runtime": 151.6573, |
|
"eval_samples_per_second": 32.712, |
|
"eval_steps_per_second": 4.095, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 26.06382978723404, |
|
"eval_cer": 0.3091953673851067, |
|
"eval_loss": 0.9286412596702576, |
|
"eval_runtime": 151.2277, |
|
"eval_samples_per_second": 32.805, |
|
"eval_steps_per_second": 4.106, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 26.329787234042552, |
|
"eval_cer": 0.3013759855588775, |
|
"eval_loss": 0.9140869379043579, |
|
"eval_runtime": 151.0836, |
|
"eval_samples_per_second": 32.836, |
|
"eval_steps_per_second": 4.11, |
|
"eval_wer": 1.0, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 26.595744680851062, |
|
"grad_norm": 4.485568046569824, |
|
"learning_rate": 0.000239856, |
|
"loss": 0.4146, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 26.595744680851062, |
|
"eval_cer": 0.3125756661878995, |
|
"eval_loss": 0.942582905292511, |
|
"eval_runtime": 151.6657, |
|
"eval_samples_per_second": 32.71, |
|
"eval_steps_per_second": 4.095, |
|
"eval_wer": 1.0, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 26.861702127659573, |
|
"eval_cer": 0.3032115008266328, |
|
"eval_loss": 0.8652423620223999, |
|
"eval_runtime": 150.762, |
|
"eval_samples_per_second": 32.906, |
|
"eval_steps_per_second": 4.119, |
|
"eval_wer": 1.0, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 27.127659574468087, |
|
"eval_cer": 0.31053186549968975, |
|
"eval_loss": 0.9288522601127625, |
|
"eval_runtime": 151.4303, |
|
"eval_samples_per_second": 32.761, |
|
"eval_steps_per_second": 4.101, |
|
"eval_wer": 1.0, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 27.393617021276597, |
|
"eval_cer": 0.31025849088534324, |
|
"eval_loss": 0.9458552002906799, |
|
"eval_runtime": 151.6351, |
|
"eval_samples_per_second": 32.717, |
|
"eval_steps_per_second": 4.095, |
|
"eval_wer": 1.0, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 27.659574468085108, |
|
"eval_cer": 0.3123847378858162, |
|
"eval_loss": 0.9137027859687805, |
|
"eval_runtime": 151.0095, |
|
"eval_samples_per_second": 32.852, |
|
"eval_steps_per_second": 4.112, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 27.925531914893618, |
|
"grad_norm": 9.41711711883545, |
|
"learning_rate": 0.000251856, |
|
"loss": 0.416, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 27.925531914893618, |
|
"eval_cer": 0.3099504020342543, |
|
"eval_loss": 0.9305452704429626, |
|
"eval_runtime": 151.4404, |
|
"eval_samples_per_second": 32.759, |
|
"eval_steps_per_second": 4.101, |
|
"eval_wer": 1.0, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 28.19148936170213, |
|
"eval_cer": 0.30706912038463374, |
|
"eval_loss": 0.9589177370071411, |
|
"eval_runtime": 152.6851, |
|
"eval_samples_per_second": 32.492, |
|
"eval_steps_per_second": 4.067, |
|
"eval_wer": 1.0, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 28.45744680851064, |
|
"eval_cer": 0.30605372895991806, |
|
"eval_loss": 0.92755526304245, |
|
"eval_runtime": 151.1956, |
|
"eval_samples_per_second": 32.812, |
|
"eval_steps_per_second": 4.107, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 28.72340425531915, |
|
"eval_cer": 0.3078067979154101, |
|
"eval_loss": 0.9259202480316162, |
|
"eval_runtime": 151.1107, |
|
"eval_samples_per_second": 32.83, |
|
"eval_steps_per_second": 4.11, |
|
"eval_wer": 1.0, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 28.98936170212766, |
|
"eval_cer": 0.3149796270823118, |
|
"eval_loss": 0.9287381768226624, |
|
"eval_runtime": 151.0264, |
|
"eval_samples_per_second": 32.849, |
|
"eval_steps_per_second": 4.112, |
|
"eval_wer": 1.0, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 29.25531914893617, |
|
"grad_norm": 6.876596927642822, |
|
"learning_rate": 0.000263832, |
|
"loss": 0.4078, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 29.25531914893617, |
|
"eval_cer": 0.3165591248540917, |
|
"eval_loss": 0.9346364736557007, |
|
"eval_runtime": 151.4411, |
|
"eval_samples_per_second": 32.759, |
|
"eval_steps_per_second": 4.101, |
|
"eval_wer": 0.9997984277363435, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 29.52127659574468, |
|
"eval_cer": 0.31648969638060687, |
|
"eval_loss": 0.9558972120285034, |
|
"eval_runtime": 151.7115, |
|
"eval_samples_per_second": 32.7, |
|
"eval_steps_per_second": 4.093, |
|
"eval_wer": 1.0, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 29.78723404255319, |
|
"eval_cer": 0.31732717734201765, |
|
"eval_loss": 0.9669720530509949, |
|
"eval_runtime": 151.1358, |
|
"eval_samples_per_second": 32.825, |
|
"eval_steps_per_second": 4.109, |
|
"eval_wer": 1.0, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 11280, |
|
"total_flos": 1.1567283350118728e+19, |
|
"train_loss": 1.009270377192937, |
|
"train_runtime": 42299.7955, |
|
"train_samples_per_second": 8.533, |
|
"train_steps_per_second": 0.267 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 11280, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1567283350118728e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|