{ "best_metric": 0.5884432792663574, "best_model_checkpoint": "/scratch/skscla001/results/mms-1b-all-bem-natbed-nn-model/checkpoint-2200", "epoch": 6.320541760722348, "eval_steps": 100, "global_step": 2800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.22573363431151242, "grad_norm": 1.8729794025421143, "learning_rate": 0.00028799999999999995, "loss": 7.9244, "step": 100 }, { "epoch": 0.22573363431151242, "eval_loss": 1.35137140750885, "eval_runtime": 45.1782, "eval_samples_per_second": 15.671, "eval_steps_per_second": 1.97, "eval_wer": 1.0237771739130435, "step": 100 }, { "epoch": 0.45146726862302483, "grad_norm": 2.6380980014801025, "learning_rate": 0.0002978165276724791, "loss": 1.0236, "step": 200 }, { "epoch": 0.45146726862302483, "eval_loss": 0.8354936838150024, "eval_runtime": 44.7017, "eval_samples_per_second": 15.838, "eval_steps_per_second": 1.991, "eval_wer": 0.6594769021739131, "step": 200 }, { "epoch": 0.6772009029345373, "grad_norm": 1.9870244264602661, "learning_rate": 0.0002955420773313116, "loss": 0.8005, "step": 300 }, { "epoch": 0.6772009029345373, "eval_loss": 0.7836518287658691, "eval_runtime": 45.0039, "eval_samples_per_second": 15.732, "eval_steps_per_second": 1.978, "eval_wer": 0.6141304347826086, "step": 300 }, { "epoch": 0.9029345372460497, "grad_norm": 2.012474536895752, "learning_rate": 0.00029326762699014404, "loss": 0.8968, "step": 400 }, { "epoch": 0.9029345372460497, "eval_loss": 0.7808529138565063, "eval_runtime": 44.7704, "eval_samples_per_second": 15.814, "eval_steps_per_second": 1.988, "eval_wer": 0.6042798913043478, "step": 400 }, { "epoch": 1.1286681715575622, "grad_norm": 8.587899208068848, "learning_rate": 0.00029099317664897645, "loss": 0.8909, "step": 500 }, { "epoch": 1.1286681715575622, "eval_loss": 0.7146816849708557, "eval_runtime": 44.5851, "eval_samples_per_second": 15.88, "eval_steps_per_second": 1.996, "eval_wer": 0.5952785326086957, "step": 500 }, { "epoch": 1.3544018058690745, "grad_norm": 8.078141212463379, "learning_rate": 0.0002887187263078089, "loss": 0.7983, "step": 600 }, { "epoch": 1.3544018058690745, "eval_loss": 0.6989510655403137, "eval_runtime": 44.4701, "eval_samples_per_second": 15.921, "eval_steps_per_second": 2.001, "eval_wer": 0.5930706521739131, "step": 600 }, { "epoch": 1.580135440180587, "grad_norm": 2.5096583366394043, "learning_rate": 0.00028644427596664137, "loss": 0.8563, "step": 700 }, { "epoch": 1.580135440180587, "eval_loss": 0.6804757118225098, "eval_runtime": 44.8305, "eval_samples_per_second": 15.793, "eval_steps_per_second": 1.985, "eval_wer": 0.5964673913043478, "step": 700 }, { "epoch": 1.8058690744920993, "grad_norm": 2.3668673038482666, "learning_rate": 0.00028416982562547383, "loss": 0.7094, "step": 800 }, { "epoch": 1.8058690744920993, "eval_loss": 0.6849333047866821, "eval_runtime": 44.6427, "eval_samples_per_second": 15.859, "eval_steps_per_second": 1.994, "eval_wer": 0.5808423913043478, "step": 800 }, { "epoch": 2.0316027088036117, "grad_norm": 1.9050077199935913, "learning_rate": 0.0002819181197877179, "loss": 0.7499, "step": 900 }, { "epoch": 2.0316027088036117, "eval_loss": 0.6456880569458008, "eval_runtime": 44.6644, "eval_samples_per_second": 15.852, "eval_steps_per_second": 1.993, "eval_wer": 0.5934103260869565, "step": 900 }, { "epoch": 2.2573363431151243, "grad_norm": 2.829195737838745, "learning_rate": 0.0002796436694465504, "loss": 0.7722, "step": 1000 }, { "epoch": 2.2573363431151243, "eval_loss": 0.6565266251564026, "eval_runtime": 44.7397, "eval_samples_per_second": 15.825, "eval_steps_per_second": 1.989, "eval_wer": 0.5874660326086957, "step": 1000 }, { "epoch": 2.4830699774266365, "grad_norm": 0.8484971523284912, "learning_rate": 0.00027736921910538284, "loss": 0.7099, "step": 1100 }, { "epoch": 2.4830699774266365, "eval_loss": 0.6419216394424438, "eval_runtime": 44.7023, "eval_samples_per_second": 15.838, "eval_steps_per_second": 1.991, "eval_wer": 0.5596127717391305, "step": 1100 }, { "epoch": 2.708803611738149, "grad_norm": 2.4867944717407227, "learning_rate": 0.0002750947687642153, "loss": 0.7416, "step": 1200 }, { "epoch": 2.708803611738149, "eval_loss": 0.6195096373558044, "eval_runtime": 44.7966, "eval_samples_per_second": 15.805, "eval_steps_per_second": 1.987, "eval_wer": 0.561141304347826, "step": 1200 }, { "epoch": 2.9345372460496613, "grad_norm": 4.84053373336792, "learning_rate": 0.00027282031842304776, "loss": 0.6385, "step": 1300 }, { "epoch": 2.9345372460496613, "eval_loss": 0.6227733492851257, "eval_runtime": 44.9228, "eval_samples_per_second": 15.76, "eval_steps_per_second": 1.981, "eval_wer": 0.5647078804347826, "step": 1300 }, { "epoch": 3.160270880361174, "grad_norm": 0.9543440341949463, "learning_rate": 0.0002705458680818802, "loss": 0.6436, "step": 1400 }, { "epoch": 3.160270880361174, "eval_loss": 0.6184154152870178, "eval_runtime": 44.7251, "eval_samples_per_second": 15.83, "eval_steps_per_second": 1.99, "eval_wer": 0.5509510869565217, "step": 1400 }, { "epoch": 3.386004514672686, "grad_norm": 0.8133373856544495, "learning_rate": 0.0002682714177407126, "loss": 0.6795, "step": 1500 }, { "epoch": 3.386004514672686, "eval_loss": 0.6156527996063232, "eval_runtime": 45.3372, "eval_samples_per_second": 15.616, "eval_steps_per_second": 1.963, "eval_wer": 0.553328804347826, "step": 1500 }, { "epoch": 3.6117381489841986, "grad_norm": 25.58840560913086, "learning_rate": 0.0002659969673995451, "loss": 0.7027, "step": 1600 }, { "epoch": 3.6117381489841986, "eval_loss": 0.6343082785606384, "eval_runtime": 45.529, "eval_samples_per_second": 15.551, "eval_steps_per_second": 1.955, "eval_wer": 0.5426290760869565, "step": 1600 }, { "epoch": 3.837471783295711, "grad_norm": 0.6009318828582764, "learning_rate": 0.00026372251705837754, "loss": 0.6585, "step": 1700 }, { "epoch": 3.837471783295711, "eval_loss": 0.6057115793228149, "eval_runtime": 44.8336, "eval_samples_per_second": 15.792, "eval_steps_per_second": 1.985, "eval_wer": 0.5427989130434783, "step": 1700 }, { "epoch": 4.063205417607223, "grad_norm": 0.929165244102478, "learning_rate": 0.00026144806671720994, "loss": 0.6351, "step": 1800 }, { "epoch": 4.063205417607223, "eval_loss": 0.6017059683799744, "eval_runtime": 44.5067, "eval_samples_per_second": 15.908, "eval_steps_per_second": 2.0, "eval_wer": 0.54296875, "step": 1800 }, { "epoch": 4.288939051918736, "grad_norm": 1.4761062860488892, "learning_rate": 0.00025917361637604246, "loss": 0.6528, "step": 1900 }, { "epoch": 4.288939051918736, "eval_loss": 0.6098975539207458, "eval_runtime": 44.8754, "eval_samples_per_second": 15.777, "eval_steps_per_second": 1.983, "eval_wer": 0.5339673913043478, "step": 1900 }, { "epoch": 4.514672686230249, "grad_norm": 1.2957922220230103, "learning_rate": 0.00025689916603487486, "loss": 0.6603, "step": 2000 }, { "epoch": 4.514672686230249, "eval_loss": 0.621790885925293, "eval_runtime": 45.2703, "eval_samples_per_second": 15.639, "eval_steps_per_second": 1.966, "eval_wer": 0.5334578804347826, "step": 2000 }, { "epoch": 4.74040632054176, "grad_norm": 3.225343942642212, "learning_rate": 0.0002546247156937073, "loss": 0.6676, "step": 2100 }, { "epoch": 4.74040632054176, "eval_loss": 0.5977216958999634, "eval_runtime": 44.9573, "eval_samples_per_second": 15.748, "eval_steps_per_second": 1.98, "eval_wer": 0.5322690217391305, "step": 2100 }, { "epoch": 4.966139954853273, "grad_norm": 1.4750922918319702, "learning_rate": 0.0002523502653525398, "loss": 0.6304, "step": 2200 }, { "epoch": 4.966139954853273, "eval_loss": 0.5884432792663574, "eval_runtime": 45.456, "eval_samples_per_second": 15.575, "eval_steps_per_second": 1.958, "eval_wer": 0.5332880434782609, "step": 2200 }, { "epoch": 5.191873589164786, "grad_norm": 0.7652086615562439, "learning_rate": 0.00025007581501137224, "loss": 0.5976, "step": 2300 }, { "epoch": 5.191873589164786, "eval_loss": 0.5955621600151062, "eval_runtime": 45.3065, "eval_samples_per_second": 15.627, "eval_steps_per_second": 1.964, "eval_wer": 0.5227581521739131, "step": 2300 }, { "epoch": 5.417607223476298, "grad_norm": 3.0203540325164795, "learning_rate": 0.00024780136467020465, "loss": 0.6564, "step": 2400 }, { "epoch": 5.417607223476298, "eval_loss": 0.5956509709358215, "eval_runtime": 45.0964, "eval_samples_per_second": 15.7, "eval_steps_per_second": 1.974, "eval_wer": 0.5302309782608695, "step": 2400 }, { "epoch": 5.643340857787811, "grad_norm": 1.8780221939086914, "learning_rate": 0.00024552691432903716, "loss": 0.6717, "step": 2500 }, { "epoch": 5.643340857787811, "eval_loss": 0.5767239332199097, "eval_runtime": 44.9865, "eval_samples_per_second": 15.738, "eval_steps_per_second": 1.978, "eval_wer": 0.5183423913043478, "step": 2500 }, { "epoch": 5.8690744920993225, "grad_norm": 0.8542383909225464, "learning_rate": 0.00024325246398786956, "loss": 0.6091, "step": 2600 }, { "epoch": 5.8690744920993225, "eval_loss": 0.592084527015686, "eval_runtime": 44.6533, "eval_samples_per_second": 15.855, "eval_steps_per_second": 1.993, "eval_wer": 0.52734375, "step": 2600 }, { "epoch": 6.094808126410835, "grad_norm": 1.4033461809158325, "learning_rate": 0.00024097801364670205, "loss": 0.6168, "step": 2700 }, { "epoch": 6.094808126410835, "eval_loss": 0.5894186496734619, "eval_runtime": 45.0539, "eval_samples_per_second": 15.715, "eval_steps_per_second": 1.975, "eval_wer": 0.5275135869565217, "step": 2700 }, { "epoch": 6.320541760722348, "grad_norm": 10.08028507232666, "learning_rate": 0.00023870356330553448, "loss": 0.6495, "step": 2800 }, { "epoch": 6.320541760722348, "eval_loss": 0.6036040782928467, "eval_runtime": 45.0407, "eval_samples_per_second": 15.719, "eval_steps_per_second": 1.976, "eval_wer": 0.5197010869565217, "step": 2800 }, { "epoch": 6.320541760722348, "step": 2800, "total_flos": 1.3313268682658783e+19, "train_loss": 0.972996187210083, "train_runtime": 3933.908, "train_samples_per_second": 27.011, "train_steps_per_second": 3.378 } ], "logging_steps": 100, "max_steps": 13290, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3313268682658783e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }