{ "best_metric": 2.9668984413146973, "best_model_checkpoint": "./Hubert-common_voice-phoneme-debug-warmup500/checkpoint-3600", "epoch": 30.0, "eval_steps": 100, "global_step": 4230, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7092198581560284, "eval_cer": 0.9850705754614549, "eval_loss": 4.566883563995361, "eval_runtime": 21.222, "eval_samples_per_second": 23.56, "eval_steps_per_second": 2.969, "eval_wer": 1.0, "step": 100 }, { "epoch": 1.4184397163120568, "eval_cer": 0.9851007359150682, "eval_loss": 3.011932134628296, "eval_runtime": 21.0835, "eval_samples_per_second": 23.715, "eval_steps_per_second": 2.988, "eval_wer": 1.0, "step": 200 }, { "epoch": 2.127659574468085, "eval_cer": 0.9850705754614549, "eval_loss": 2.9839680194854736, "eval_runtime": 20.7833, "eval_samples_per_second": 24.058, "eval_steps_per_second": 3.031, "eval_wer": 1.0, "step": 300 }, { "epoch": 2.8368794326241136, "eval_cer": 0.9851007359150682, "eval_loss": 2.9763612747192383, "eval_runtime": 20.9118, "eval_samples_per_second": 23.91, "eval_steps_per_second": 3.013, "eval_wer": 1.0, "step": 400 }, { "epoch": 3.546099290780142, "grad_norm": 0.8111785054206848, "learning_rate": 0.0002982, "loss": 3.973, "step": 500 }, { "epoch": 3.546099290780142, "eval_cer": 0.9851007359150682, "eval_loss": 2.9795632362365723, "eval_runtime": 20.5236, "eval_samples_per_second": 24.362, "eval_steps_per_second": 3.07, "eval_wer": 1.0, "step": 500 }, { "epoch": 4.25531914893617, "eval_cer": 0.9851007359150682, "eval_loss": 2.9758143424987793, "eval_runtime": 21.1254, "eval_samples_per_second": 23.668, "eval_steps_per_second": 2.982, "eval_wer": 1.0, "step": 600 }, { "epoch": 4.964539007092198, "eval_cer": 0.9851007359150682, "eval_loss": 2.9691450595855713, "eval_runtime": 20.6886, "eval_samples_per_second": 24.168, "eval_steps_per_second": 3.045, "eval_wer": 1.0, "step": 700 }, { "epoch": 5.673758865248227, "eval_cer": 0.9850404150078417, "eval_loss": 2.985788345336914, "eval_runtime": 20.5834, "eval_samples_per_second": 24.291, "eval_steps_per_second": 3.061, "eval_wer": 1.0, "step": 800 }, { "epoch": 6.382978723404255, "eval_cer": 0.9850705754614549, "eval_loss": 2.9692296981811523, "eval_runtime": 21.3068, "eval_samples_per_second": 23.467, "eval_steps_per_second": 2.957, "eval_wer": 1.0, "step": 900 }, { "epoch": 7.092198581560283, "grad_norm": 0.9110927581787109, "learning_rate": 0.00028704894194342103, "loss": 2.9654, "step": 1000 }, { "epoch": 7.092198581560283, "eval_cer": 0.9850404150078417, "eval_loss": 2.9895429611206055, "eval_runtime": 21.477, "eval_samples_per_second": 23.281, "eval_steps_per_second": 2.933, "eval_wer": 1.0, "step": 1000 }, { "epoch": 7.801418439716312, "eval_cer": 0.9850102545542285, "eval_loss": 2.972479820251465, "eval_runtime": 20.7781, "eval_samples_per_second": 24.064, "eval_steps_per_second": 3.032, "eval_wer": 1.0, "step": 1100 }, { "epoch": 8.51063829787234, "eval_cer": 0.9850404150078417, "eval_loss": 2.971254825592041, "eval_runtime": 21.378, "eval_samples_per_second": 23.389, "eval_steps_per_second": 2.947, "eval_wer": 1.0, "step": 1200 }, { "epoch": 9.21985815602837, "eval_cer": 0.9851007359150682, "eval_loss": 2.9757533073425293, "eval_runtime": 21.4867, "eval_samples_per_second": 23.27, "eval_steps_per_second": 2.932, "eval_wer": 1.0, "step": 1300 }, { "epoch": 9.929078014184396, "eval_cer": 0.9850404150078417, "eval_loss": 2.978407859802246, "eval_runtime": 21.1721, "eval_samples_per_second": 23.616, "eval_steps_per_second": 2.976, "eval_wer": 1.0, "step": 1400 }, { "epoch": 10.638297872340425, "grad_norm": 0.4841027557849884, "learning_rate": 0.00025015032745484046, "loss": 2.9643, "step": 1500 }, { "epoch": 10.638297872340425, "eval_cer": 0.9851007359150682, "eval_loss": 2.968749761581421, "eval_runtime": 21.3576, "eval_samples_per_second": 23.411, "eval_steps_per_second": 2.95, "eval_wer": 1.0, "step": 1500 }, { "epoch": 11.347517730496454, "eval_cer": 0.9850705754614549, "eval_loss": 2.977853536605835, "eval_runtime": 21.1411, "eval_samples_per_second": 23.651, "eval_steps_per_second": 2.98, "eval_wer": 1.0, "step": 1600 }, { "epoch": 12.056737588652481, "eval_cer": 0.9850404150078417, "eval_loss": 2.9678869247436523, "eval_runtime": 21.0582, "eval_samples_per_second": 23.744, "eval_steps_per_second": 2.992, "eval_wer": 1.0, "step": 1700 }, { "epoch": 12.76595744680851, "eval_cer": 0.9850705754614549, "eval_loss": 2.976925849914551, "eval_runtime": 21.1415, "eval_samples_per_second": 23.65, "eval_steps_per_second": 2.98, "eval_wer": 1.0, "step": 1800 }, { "epoch": 13.47517730496454, "eval_cer": 0.9850705754614549, "eval_loss": 2.9718081951141357, "eval_runtime": 20.8074, "eval_samples_per_second": 24.03, "eval_steps_per_second": 3.028, "eval_wer": 1.0, "step": 1900 }, { "epoch": 14.184397163120567, "grad_norm": 0.6759688258171082, "learning_rate": 0.00019575136936746506, "loss": 2.9631, "step": 2000 }, { "epoch": 14.184397163120567, "eval_cer": 0.9851007359150682, "eval_loss": 2.968552827835083, "eval_runtime": 21.3479, "eval_samples_per_second": 23.421, "eval_steps_per_second": 2.951, "eval_wer": 1.0, "step": 2000 }, { "epoch": 14.893617021276595, "eval_cer": 0.9850404150078417, "eval_loss": 2.9706172943115234, "eval_runtime": 21.7626, "eval_samples_per_second": 22.975, "eval_steps_per_second": 2.895, "eval_wer": 1.0, "step": 2100 }, { "epoch": 15.602836879432624, "eval_cer": 0.9851007359150682, "eval_loss": 2.9790878295898438, "eval_runtime": 20.9137, "eval_samples_per_second": 23.908, "eval_steps_per_second": 3.012, "eval_wer": 1.0, "step": 2200 }, { "epoch": 16.31205673758865, "eval_cer": 0.9850705754614549, "eval_loss": 2.97310733795166, "eval_runtime": 21.1636, "eval_samples_per_second": 23.625, "eval_steps_per_second": 2.977, "eval_wer": 1.0, "step": 2300 }, { "epoch": 17.02127659574468, "eval_cer": 0.9850404150078417, "eval_loss": 2.97218656539917, "eval_runtime": 20.5792, "eval_samples_per_second": 24.296, "eval_steps_per_second": 3.061, "eval_wer": 1.0, "step": 2400 }, { "epoch": 17.73049645390071, "grad_norm": 0.45569872856140137, "learning_rate": 0.00013335778256150607, "loss": 2.9627, "step": 2500 }, { "epoch": 17.73049645390071, "eval_cer": 0.9850705754614549, "eval_loss": 2.9722900390625, "eval_runtime": 21.0187, "eval_samples_per_second": 23.788, "eval_steps_per_second": 2.997, "eval_wer": 1.0, "step": 2500 }, { "epoch": 18.43971631205674, "eval_cer": 0.9850705754614549, "eval_loss": 2.9688594341278076, "eval_runtime": 20.8051, "eval_samples_per_second": 24.033, "eval_steps_per_second": 3.028, "eval_wer": 1.0, "step": 2600 }, { "epoch": 19.148936170212767, "eval_cer": 0.9850705754614549, "eval_loss": 2.974661111831665, "eval_runtime": 20.7915, "eval_samples_per_second": 24.048, "eval_steps_per_second": 3.03, "eval_wer": 1.0, "step": 2700 }, { "epoch": 19.858156028368793, "eval_cer": 0.9850705754614549, "eval_loss": 2.980069160461426, "eval_runtime": 21.3631, "eval_samples_per_second": 23.405, "eval_steps_per_second": 2.949, "eval_wer": 1.0, "step": 2800 }, { "epoch": 20.56737588652482, "eval_cer": 0.9850705754614549, "eval_loss": 2.9740312099456787, "eval_runtime": 21.1673, "eval_samples_per_second": 23.621, "eval_steps_per_second": 2.976, "eval_wer": 1.0, "step": 2900 }, { "epoch": 21.27659574468085, "grad_norm": 1.4903497695922852, "learning_rate": 7.387226935671251e-05, "loss": 2.9622, "step": 3000 }, { "epoch": 21.27659574468085, "eval_cer": 0.9850102545542285, "eval_loss": 2.973616600036621, "eval_runtime": 21.0369, "eval_samples_per_second": 23.768, "eval_steps_per_second": 2.995, "eval_wer": 1.0, "step": 3000 }, { "epoch": 21.98581560283688, "eval_cer": 0.9850705754614549, "eval_loss": 2.9718637466430664, "eval_runtime": 21.543, "eval_samples_per_second": 23.209, "eval_steps_per_second": 2.924, "eval_wer": 1.0, "step": 3100 }, { "epoch": 22.69503546099291, "eval_cer": 0.9850404150078417, "eval_loss": 2.9710469245910645, "eval_runtime": 21.5903, "eval_samples_per_second": 23.159, "eval_steps_per_second": 2.918, "eval_wer": 1.0, "step": 3200 }, { "epoch": 23.404255319148938, "eval_cer": 0.9850102545542285, "eval_loss": 2.971414804458618, "eval_runtime": 21.0287, "eval_samples_per_second": 23.777, "eval_steps_per_second": 2.996, "eval_wer": 1.0, "step": 3300 }, { "epoch": 24.113475177304963, "eval_cer": 0.9851007359150682, "eval_loss": 2.9701104164123535, "eval_runtime": 22.2142, "eval_samples_per_second": 22.508, "eval_steps_per_second": 2.836, "eval_wer": 1.0, "step": 3400 }, { "epoch": 24.822695035460992, "grad_norm": 0.7893990874290466, "learning_rate": 2.768937310296969e-05, "loss": 2.9609, "step": 3500 }, { "epoch": 24.822695035460992, "eval_cer": 0.9850705754614549, "eval_loss": 2.9694862365722656, "eval_runtime": 20.7615, "eval_samples_per_second": 24.083, "eval_steps_per_second": 3.034, "eval_wer": 1.0, "step": 3500 }, { "epoch": 25.53191489361702, "eval_cer": 0.9850404150078417, "eval_loss": 2.9668984413146973, "eval_runtime": 21.25, "eval_samples_per_second": 23.529, "eval_steps_per_second": 2.965, "eval_wer": 1.0, "step": 3600 }, { "epoch": 26.24113475177305, "eval_cer": 0.9851308963686813, "eval_loss": 2.9773755073547363, "eval_runtime": 21.3493, "eval_samples_per_second": 23.42, "eval_steps_per_second": 2.951, "eval_wer": 1.0, "step": 3700 }, { "epoch": 26.95035460992908, "eval_cer": 0.9850705754614549, "eval_loss": 2.971196174621582, "eval_runtime": 20.8572, "eval_samples_per_second": 23.973, "eval_steps_per_second": 3.021, "eval_wer": 1.0, "step": 3800 }, { "epoch": 27.659574468085108, "eval_cer": 0.9850705754614549, "eval_loss": 2.970116138458252, "eval_runtime": 21.7187, "eval_samples_per_second": 23.022, "eval_steps_per_second": 2.901, "eval_wer": 1.0, "step": 3900 }, { "epoch": 28.368794326241133, "grad_norm": 0.41945821046829224, "learning_rate": 2.8791278517630856e-06, "loss": 2.962, "step": 4000 }, { "epoch": 28.368794326241133, "eval_cer": 0.9851308963686813, "eval_loss": 2.9688799381256104, "eval_runtime": 21.2529, "eval_samples_per_second": 23.526, "eval_steps_per_second": 2.964, "eval_wer": 1.0, "step": 4000 }, { "epoch": 29.078014184397162, "eval_cer": 0.9850404150078417, "eval_loss": 2.973837375640869, "eval_runtime": 21.0065, "eval_samples_per_second": 23.802, "eval_steps_per_second": 2.999, "eval_wer": 1.0, "step": 4100 }, { "epoch": 29.78723404255319, "eval_cer": 0.9851308963686813, "eval_loss": 2.9678163528442383, "eval_runtime": 21.5034, "eval_samples_per_second": 23.252, "eval_steps_per_second": 2.93, "eval_wer": 1.0, "step": 4200 }, { "epoch": 30.0, "step": 4230, "total_flos": 6.12842473721103e+18, "train_loss": 3.08231661460642, "train_runtime": 8045.4114, "train_samples_per_second": 16.78, "train_steps_per_second": 0.526 } ], "logging_steps": 500, "max_steps": 4230, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.12842473721103e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }