{ "best_metric": 0.6672275066375732, "best_model_checkpoint": "./Hubert-common_voice-ja-demo-phonemes-cosine-3e-5/checkpoint-6100", "epoch": 20.0, "eval_steps": 100, "global_step": 7520, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.26595744680851063, "eval_cer": 4.906748881550685, "eval_loss": Infinity, "eval_runtime": 231.9839, "eval_samples_per_second": 21.385, "eval_steps_per_second": 2.677, "eval_wer": 1.8203991130820398, "step": 100 }, { "epoch": 0.5319148936170213, "eval_cer": 4.632337179381479, "eval_loss": Infinity, "eval_runtime": 212.1645, "eval_samples_per_second": 23.383, "eval_steps_per_second": 2.927, "eval_wer": 1.5926224551501713, "step": 200 }, { "epoch": 0.7978723404255319, "eval_cer": 1.9636802298082472, "eval_loss": Infinity, "eval_runtime": 212.1688, "eval_samples_per_second": 23.382, "eval_steps_per_second": 2.927, "eval_wer": 1.1769804474904253, "step": 300 }, { "epoch": 1.0638297872340425, "eval_cer": 0.9816665437204115, "eval_loss": Infinity, "eval_runtime": 208.9606, "eval_samples_per_second": 23.741, "eval_steps_per_second": 2.972, "eval_wer": 1.0, "step": 400 }, { "epoch": 1.3297872340425532, "grad_norm": 50.47308349609375, "learning_rate": 1.1904e-06, "loss": 14.493, "step": 500 }, { "epoch": 1.3297872340425532, "eval_cer": 0.9816752222795971, "eval_loss": Infinity, "eval_runtime": 209.2095, "eval_samples_per_second": 23.713, "eval_steps_per_second": 2.968, "eval_wer": 1.0, "step": 500 }, { "epoch": 1.5957446808510638, "eval_cer": 0.9816708830000044, "eval_loss": Infinity, "eval_runtime": 208.2442, "eval_samples_per_second": 23.823, "eval_steps_per_second": 2.982, "eval_wer": 1.0, "step": 600 }, { "epoch": 1.8617021276595744, "eval_cer": 0.9816622044408188, "eval_loss": Infinity, "eval_runtime": 208.3548, "eval_samples_per_second": 23.81, "eval_steps_per_second": 2.98, "eval_wer": 1.0, "step": 700 }, { "epoch": 2.127659574468085, "eval_cer": 0.9816752222795971, "eval_loss": Infinity, "eval_runtime": 207.0664, "eval_samples_per_second": 23.958, "eval_steps_per_second": 2.999, "eval_wer": 1.0, "step": 800 }, { "epoch": 2.393617021276596, "eval_cer": 0.9816795615591899, "eval_loss": Infinity, "eval_runtime": 207.7036, "eval_samples_per_second": 23.885, "eval_steps_per_second": 2.99, "eval_wer": 1.0, "step": 900 }, { "epoch": 2.6595744680851063, "grad_norm": 46.10651397705078, "learning_rate": 2.3904e-06, "loss": 6.5744, "step": 1000 }, { "epoch": 2.6595744680851063, "eval_cer": 0.9816839008387828, "eval_loss": 6.808023452758789, "eval_runtime": 220.5662, "eval_samples_per_second": 22.492, "eval_steps_per_second": 2.815, "eval_wer": 1.0, "step": 1000 }, { "epoch": 2.925531914893617, "eval_cer": 0.9816795615591899, "eval_loss": 6.59719705581665, "eval_runtime": 208.0758, "eval_samples_per_second": 23.842, "eval_steps_per_second": 2.984, "eval_wer": 1.0, "step": 1100 }, { "epoch": 3.1914893617021276, "eval_cer": 0.9816795615591899, "eval_loss": Infinity, "eval_runtime": 207.6614, "eval_samples_per_second": 23.89, "eval_steps_per_second": 2.99, "eval_wer": 1.0, "step": 1200 }, { "epoch": 3.4574468085106385, "eval_cer": 0.9816882401183755, "eval_loss": Infinity, "eval_runtime": 209.3327, "eval_samples_per_second": 23.699, "eval_steps_per_second": 2.967, "eval_wer": 1.0, "step": 1300 }, { "epoch": 3.723404255319149, "eval_cer": 0.9816708830000044, "eval_loss": Infinity, "eval_runtime": 208.2723, "eval_samples_per_second": 23.82, "eval_steps_per_second": 2.982, "eval_wer": 1.0, "step": 1400 }, { "epoch": 3.9893617021276597, "grad_norm": 31.730201721191406, "learning_rate": 3.5904e-06, "loss": 5.5193, "step": 1500 }, { "epoch": 3.9893617021276597, "eval_cer": 0.9816795615591899, "eval_loss": Infinity, "eval_runtime": 208.1441, "eval_samples_per_second": 23.834, "eval_steps_per_second": 2.984, "eval_wer": 1.0, "step": 1500 }, { "epoch": 4.25531914893617, "eval_cer": 0.9816708830000044, "eval_loss": Infinity, "eval_runtime": 211.1492, "eval_samples_per_second": 23.495, "eval_steps_per_second": 2.941, "eval_wer": 1.0, "step": 1600 }, { "epoch": 4.5212765957446805, "eval_cer": 0.9816752222795971, "eval_loss": Infinity, "eval_runtime": 208.0326, "eval_samples_per_second": 23.847, "eval_steps_per_second": 2.985, "eval_wer": 1.0, "step": 1700 }, { "epoch": 4.787234042553192, "eval_cer": 0.9816708830000044, "eval_loss": Infinity, "eval_runtime": 212.7439, "eval_samples_per_second": 23.319, "eval_steps_per_second": 2.919, "eval_wer": 1.0, "step": 1800 }, { "epoch": 5.053191489361702, "eval_cer": 0.9816622044408188, "eval_loss": Infinity, "eval_runtime": 211.9236, "eval_samples_per_second": 23.409, "eval_steps_per_second": 2.93, "eval_wer": 1.0, "step": 1900 }, { "epoch": 5.319148936170213, "grad_norm": 14.480951309204102, "learning_rate": 4.7904e-06, "loss": 4.5578, "step": 2000 }, { "epoch": 5.319148936170213, "eval_cer": 0.9816752222795971, "eval_loss": Infinity, "eval_runtime": 225.0981, "eval_samples_per_second": 22.039, "eval_steps_per_second": 2.759, "eval_wer": 1.0, "step": 2000 }, { "epoch": 5.585106382978723, "eval_cer": 0.9816622044408188, "eval_loss": Infinity, "eval_runtime": 208.2028, "eval_samples_per_second": 23.828, "eval_steps_per_second": 2.983, "eval_wer": 1.0, "step": 2100 }, { "epoch": 5.851063829787234, "eval_cer": 0.9816752222795971, "eval_loss": Infinity, "eval_runtime": 208.7044, "eval_samples_per_second": 23.77, "eval_steps_per_second": 2.976, "eval_wer": 1.0, "step": 2200 }, { "epoch": 6.117021276595745, "eval_cer": 0.9816665437204115, "eval_loss": Infinity, "eval_runtime": 208.3105, "eval_samples_per_second": 23.815, "eval_steps_per_second": 2.981, "eval_wer": 1.0, "step": 2300 }, { "epoch": 6.382978723404255, "eval_cer": 0.9816839008387828, "eval_loss": Infinity, "eval_runtime": 208.6388, "eval_samples_per_second": 23.778, "eval_steps_per_second": 2.976, "eval_wer": 1.0, "step": 2400 }, { "epoch": 6.648936170212766, "grad_norm": 10.711950302124023, "learning_rate": 5.9904e-06, "loss": 3.6943, "step": 2500 }, { "epoch": 6.648936170212766, "eval_cer": 0.9816708830000044, "eval_loss": Infinity, "eval_runtime": 209.8055, "eval_samples_per_second": 23.646, "eval_steps_per_second": 2.96, "eval_wer": 1.0, "step": 2500 }, { "epoch": 6.914893617021277, "eval_cer": 0.9816708830000044, "eval_loss": Infinity, "eval_runtime": 209.752, "eval_samples_per_second": 23.652, "eval_steps_per_second": 2.961, "eval_wer": 1.0, "step": 2600 }, { "epoch": 7.180851063829787, "eval_cer": 0.9816839008387828, "eval_loss": Infinity, "eval_runtime": 209.5647, "eval_samples_per_second": 23.673, "eval_steps_per_second": 2.963, "eval_wer": 1.0, "step": 2700 }, { "epoch": 7.446808510638298, "eval_cer": 0.9816752222795971, "eval_loss": Infinity, "eval_runtime": 209.0972, "eval_samples_per_second": 23.726, "eval_steps_per_second": 2.97, "eval_wer": 1.0, "step": 2800 }, { "epoch": 7.712765957446808, "eval_cer": 0.9816665437204115, "eval_loss": 3.1572225093841553, "eval_runtime": 214.7616, "eval_samples_per_second": 23.1, "eval_steps_per_second": 2.892, "eval_wer": 1.0, "step": 2900 }, { "epoch": 7.9787234042553195, "grad_norm": 3.429447650909424, "learning_rate": 7.190400000000001e-06, "loss": 3.1932, "step": 3000 }, { "epoch": 7.9787234042553195, "eval_cer": 0.9816795615591899, "eval_loss": Infinity, "eval_runtime": 209.8411, "eval_samples_per_second": 23.642, "eval_steps_per_second": 2.959, "eval_wer": 1.0, "step": 3000 }, { "epoch": 8.24468085106383, "eval_cer": 0.9816795615591899, "eval_loss": Infinity, "eval_runtime": 210.153, "eval_samples_per_second": 23.607, "eval_steps_per_second": 2.955, "eval_wer": 1.0, "step": 3100 }, { "epoch": 8.51063829787234, "eval_cer": 0.9816708830000044, "eval_loss": Infinity, "eval_runtime": 209.9276, "eval_samples_per_second": 23.632, "eval_steps_per_second": 2.958, "eval_wer": 1.0, "step": 3200 }, { "epoch": 8.77659574468085, "eval_cer": 0.9816708830000044, "eval_loss": Infinity, "eval_runtime": 209.788, "eval_samples_per_second": 23.648, "eval_steps_per_second": 2.96, "eval_wer": 1.0, "step": 3300 }, { "epoch": 9.042553191489361, "eval_cer": 0.9816752222795971, "eval_loss": Infinity, "eval_runtime": 209.3829, "eval_samples_per_second": 23.693, "eval_steps_per_second": 2.966, "eval_wer": 1.0, "step": 3400 }, { "epoch": 9.308510638297872, "grad_norm": 0.5232295393943787, "learning_rate": 8.3904e-06, "loss": 3.0309, "step": 3500 }, { "epoch": 9.308510638297872, "eval_cer": 0.9816752222795971, "eval_loss": Infinity, "eval_runtime": 209.6347, "eval_samples_per_second": 23.665, "eval_steps_per_second": 2.962, "eval_wer": 1.0, "step": 3500 }, { "epoch": 9.574468085106384, "eval_cer": 0.9816708830000044, "eval_loss": Infinity, "eval_runtime": 209.0039, "eval_samples_per_second": 23.736, "eval_steps_per_second": 2.971, "eval_wer": 1.0, "step": 3600 }, { "epoch": 9.840425531914894, "eval_cer": 0.9816622044408188, "eval_loss": Infinity, "eval_runtime": 209.5465, "eval_samples_per_second": 23.675, "eval_steps_per_second": 2.964, "eval_wer": 1.0, "step": 3700 }, { "epoch": 10.106382978723405, "eval_cer": 0.9816752222795971, "eval_loss": Infinity, "eval_runtime": 209.1604, "eval_samples_per_second": 23.719, "eval_steps_per_second": 2.969, "eval_wer": 1.0, "step": 3800 }, { "epoch": 10.372340425531915, "eval_cer": 0.9816665437204115, "eval_loss": Infinity, "eval_runtime": 209.698, "eval_samples_per_second": 23.658, "eval_steps_per_second": 2.961, "eval_wer": 1.0, "step": 3900 }, { "epoch": 10.638297872340425, "grad_norm": 1.3846327066421509, "learning_rate": 9.5904e-06, "loss": 2.9704, "step": 4000 }, { "epoch": 10.638297872340425, "eval_cer": 0.9816839008387828, "eval_loss": Infinity, "eval_runtime": 210.1316, "eval_samples_per_second": 23.609, "eval_steps_per_second": 2.955, "eval_wer": 1.0, "step": 4000 }, { "epoch": 10.904255319148936, "eval_cer": 0.9816708830000044, "eval_loss": Infinity, "eval_runtime": 209.6004, "eval_samples_per_second": 23.669, "eval_steps_per_second": 2.963, "eval_wer": 1.0, "step": 4100 }, { "epoch": 11.170212765957446, "eval_cer": 0.9048569556482233, "eval_loss": Infinity, "eval_runtime": 209.9962, "eval_samples_per_second": 23.624, "eval_steps_per_second": 2.957, "eval_wer": 1.0, "step": 4200 }, { "epoch": 11.436170212765958, "eval_cer": 0.7253886909695252, "eval_loss": Infinity, "eval_runtime": 209.5146, "eval_samples_per_second": 23.679, "eval_steps_per_second": 2.964, "eval_wer": 1.0, "step": 4300 }, { "epoch": 11.702127659574469, "eval_cer": 0.4365402055950671, "eval_loss": Infinity, "eval_runtime": 210.2066, "eval_samples_per_second": 23.601, "eval_steps_per_second": 2.954, "eval_wer": 1.0, "step": 4400 }, { "epoch": 11.96808510638298, "grad_norm": 3.278918981552124, "learning_rate": 1.0790400000000001e-05, "loss": 2.2767, "step": 4500 }, { "epoch": 11.96808510638298, "eval_cer": 0.3732301163360859, "eval_loss": 1.5674688816070557, "eval_runtime": 211.1485, "eval_samples_per_second": 23.495, "eval_steps_per_second": 2.941, "eval_wer": 1.0, "step": 4500 }, { "epoch": 12.23404255319149, "eval_cer": 0.3454630662217459, "eval_loss": Infinity, "eval_runtime": 211.2681, "eval_samples_per_second": 23.482, "eval_steps_per_second": 2.939, "eval_wer": 1.0, "step": 4600 }, { "epoch": 12.5, "eval_cer": 0.3277154126871857, "eval_loss": Infinity, "eval_runtime": 210.9727, "eval_samples_per_second": 23.515, "eval_steps_per_second": 2.944, "eval_wer": 1.0, "step": 4700 }, { "epoch": 12.76595744680851, "eval_cer": 0.3052553015148425, "eval_loss": Infinity, "eval_runtime": 210.721, "eval_samples_per_second": 23.543, "eval_steps_per_second": 2.947, "eval_wer": 1.0, "step": 4800 }, { "epoch": 13.03191489361702, "eval_cer": 0.2935305680550915, "eval_loss": Infinity, "eval_runtime": 210.659, "eval_samples_per_second": 23.55, "eval_steps_per_second": 2.948, "eval_wer": 1.0, "step": 4900 }, { "epoch": 13.297872340425531, "grad_norm": 2.6736652851104736, "learning_rate": 1.19904e-05, "loss": 1.2873, "step": 5000 }, { "epoch": 13.297872340425531, "eval_cer": 0.2783691251578413, "eval_loss": Infinity, "eval_runtime": 210.6907, "eval_samples_per_second": 23.546, "eval_steps_per_second": 2.947, "eval_wer": 1.0, "step": 5000 }, { "epoch": 13.563829787234042, "eval_cer": 0.2684104784923607, "eval_loss": Infinity, "eval_runtime": 211.3579, "eval_samples_per_second": 23.472, "eval_steps_per_second": 2.938, "eval_wer": 1.0, "step": 5100 }, { "epoch": 13.829787234042554, "eval_cer": 0.26778996151059004, "eval_loss": Infinity, "eval_runtime": 210.8829, "eval_samples_per_second": 23.525, "eval_steps_per_second": 2.945, "eval_wer": 1.0, "step": 5200 }, { "epoch": 14.095744680851064, "eval_cer": 0.26162818448881114, "eval_loss": Infinity, "eval_runtime": 210.4536, "eval_samples_per_second": 23.573, "eval_steps_per_second": 2.951, "eval_wer": 1.0, "step": 5300 }, { "epoch": 14.361702127659575, "eval_cer": 0.2608384356029212, "eval_loss": 0.8213518261909485, "eval_runtime": 210.8924, "eval_samples_per_second": 23.524, "eval_steps_per_second": 2.945, "eval_wer": 1.0, "step": 5400 }, { "epoch": 14.627659574468085, "grad_norm": 4.707749843597412, "learning_rate": 1.31904e-05, "loss": 0.9318, "step": 5500 }, { "epoch": 14.627659574468085, "eval_cer": 0.2563863347407063, "eval_loss": Infinity, "eval_runtime": 210.577, "eval_samples_per_second": 23.559, "eval_steps_per_second": 2.949, "eval_wer": 1.0, "step": 5500 }, { "epoch": 14.893617021276595, "eval_cer": 0.25441630180557423, "eval_loss": Infinity, "eval_runtime": 210.7551, "eval_samples_per_second": 23.539, "eval_steps_per_second": 2.947, "eval_wer": 1.0, "step": 5600 }, { "epoch": 15.159574468085106, "eval_cer": 0.25251135806433417, "eval_loss": Infinity, "eval_runtime": 210.4666, "eval_samples_per_second": 23.571, "eval_steps_per_second": 2.951, "eval_wer": 1.0, "step": 5700 }, { "epoch": 15.425531914893616, "eval_cer": 0.25104034228237426, "eval_loss": Infinity, "eval_runtime": 210.6399, "eval_samples_per_second": 23.552, "eval_steps_per_second": 2.948, "eval_wer": 1.0, "step": 5800 }, { "epoch": 15.691489361702128, "eval_cer": 0.25268058996845344, "eval_loss": Infinity, "eval_runtime": 210.4046, "eval_samples_per_second": 23.578, "eval_steps_per_second": 2.951, "eval_wer": 1.0, "step": 5900 }, { "epoch": 15.957446808510639, "grad_norm": 3.4887096881866455, "learning_rate": 1.43904e-05, "loss": 0.754, "step": 6000 }, { "epoch": 15.957446808510639, "eval_cer": 0.24993382598620978, "eval_loss": Infinity, "eval_runtime": 210.2447, "eval_samples_per_second": 23.596, "eval_steps_per_second": 2.954, "eval_wer": 1.0, "step": 6000 }, { "epoch": 16.22340425531915, "eval_cer": 0.24853657795732753, "eval_loss": 0.6672275066375732, "eval_runtime": 210.4958, "eval_samples_per_second": 23.568, "eval_steps_per_second": 2.95, "eval_wer": 1.0, "step": 6100 }, { "epoch": 16.48936170212766, "eval_cer": 0.24641033095685455, "eval_loss": Infinity, "eval_runtime": 210.6343, "eval_samples_per_second": 23.553, "eval_steps_per_second": 2.948, "eval_wer": 1.0, "step": 6200 }, { "epoch": 16.75531914893617, "eval_cer": 0.24667936629160828, "eval_loss": Infinity, "eval_runtime": 210.3464, "eval_samples_per_second": 23.585, "eval_steps_per_second": 2.952, "eval_wer": 1.0, "step": 6300 }, { "epoch": 17.02127659574468, "eval_cer": 0.24105999921892968, "eval_loss": Infinity, "eval_runtime": 210.367, "eval_samples_per_second": 23.583, "eval_steps_per_second": 2.952, "eval_wer": 1.0, "step": 6400 }, { "epoch": 17.28723404255319, "grad_norm": 3.309645652770996, "learning_rate": 1.5590400000000002e-05, "loss": 0.6421, "step": 6500 }, { "epoch": 17.28723404255319, "eval_cer": 0.2411424455311929, "eval_loss": Infinity, "eval_runtime": 210.1359, "eval_samples_per_second": 23.609, "eval_steps_per_second": 2.955, "eval_wer": 1.0, "step": 6500 }, { "epoch": 17.5531914893617, "eval_cer": 0.2417846589109276, "eval_loss": Infinity, "eval_runtime": 210.0208, "eval_samples_per_second": 23.621, "eval_steps_per_second": 2.957, "eval_wer": 1.0, "step": 6600 }, { "epoch": 17.819148936170212, "eval_cer": 0.23863868120614615, "eval_loss": Infinity, "eval_runtime": 210.3543, "eval_samples_per_second": 23.584, "eval_steps_per_second": 2.952, "eval_wer": 1.0, "step": 6700 }, { "epoch": 18.085106382978722, "eval_cer": 0.23867773472248138, "eval_loss": Infinity, "eval_runtime": 210.344, "eval_samples_per_second": 23.585, "eval_steps_per_second": 2.952, "eval_wer": 0.999596855472687, "step": 6800 }, { "epoch": 18.351063829787233, "eval_cer": 0.23813532477338112, "eval_loss": Infinity, "eval_runtime": 210.5017, "eval_samples_per_second": 23.568, "eval_steps_per_second": 2.95, "eval_wer": 1.0, "step": 6900 }, { "epoch": 18.617021276595743, "grad_norm": 3.4141018390655518, "learning_rate": 1.6790399999999998e-05, "loss": 0.568, "step": 7000 }, { "epoch": 18.617021276595743, "eval_cer": 0.23912468052053998, "eval_loss": Infinity, "eval_runtime": 210.039, "eval_samples_per_second": 23.619, "eval_steps_per_second": 2.957, "eval_wer": 1.0, "step": 7000 }, { "epoch": 18.882978723404257, "eval_cer": 0.236976737122103, "eval_loss": Infinity, "eval_runtime": 210.1426, "eval_samples_per_second": 23.608, "eval_steps_per_second": 2.955, "eval_wer": 1.0, "step": 7100 }, { "epoch": 19.148936170212767, "eval_cer": 0.2344165621623498, "eval_loss": Infinity, "eval_runtime": 210.1255, "eval_samples_per_second": 23.61, "eval_steps_per_second": 2.955, "eval_wer": 1.0, "step": 7200 }, { "epoch": 19.414893617021278, "eval_cer": 0.2363562201403323, "eval_loss": Infinity, "eval_runtime": 211.0028, "eval_samples_per_second": 23.512, "eval_steps_per_second": 2.943, "eval_wer": 1.0, "step": 7300 }, { "epoch": 19.680851063829788, "eval_cer": 0.23471163317466034, "eval_loss": Infinity, "eval_runtime": 210.5331, "eval_samples_per_second": 23.564, "eval_steps_per_second": 2.95, "eval_wer": 1.0, "step": 7400 }, { "epoch": 19.9468085106383, "grad_norm": 4.509326934814453, "learning_rate": 1.79904e-05, "loss": 0.5259, "step": 7500 }, { "epoch": 19.9468085106383, "eval_cer": 0.23339249217844854, "eval_loss": Infinity, "eval_runtime": 210.6718, "eval_samples_per_second": 23.548, "eval_steps_per_second": 2.948, "eval_wer": 1.0, "step": 7500 }, { "epoch": 20.0, "step": 7520, "total_flos": 1.051128494332674e+19, "train_loss": 3.393639066751967, "train_runtime": 40261.649, "train_samples_per_second": 5.977, "train_steps_per_second": 0.187 } ], "logging_steps": 500, "max_steps": 7520, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.051128494332674e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }