{ "best_metric": 0.5324379205703735, "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-lozgen-balanced-model/checkpoint-2300", "epoch": 21.13821138211382, "eval_steps": 100, "global_step": 2600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8130081300813008, "grad_norm": 2.741861343383789, "learning_rate": 0.00028799999999999995, "loss": 6.6518, "step": 100 }, { "epoch": 0.8130081300813008, "eval_loss": 3.1670827865600586, "eval_runtime": 20.9246, "eval_samples_per_second": 13.142, "eval_steps_per_second": 3.298, "eval_wer": 0.9942789034564958, "step": 100 }, { "epoch": 1.6260162601626016, "grad_norm": 1.7816331386566162, "learning_rate": 0.0002919777158774373, "loss": 2.6718, "step": 200 }, { "epoch": 1.6260162601626016, "eval_loss": 2.2608625888824463, "eval_runtime": 20.9525, "eval_samples_per_second": 13.125, "eval_steps_per_second": 3.293, "eval_wer": 0.931585220500596, "step": 200 }, { "epoch": 2.4390243902439024, "grad_norm": 1.4348615407943726, "learning_rate": 0.0002836211699164345, "loss": 1.4567, "step": 300 }, { "epoch": 2.4390243902439024, "eval_loss": 0.740405261516571, "eval_runtime": 20.9256, "eval_samples_per_second": 13.142, "eval_steps_per_second": 3.297, "eval_wer": 0.7191895113230036, "step": 300 }, { "epoch": 3.252032520325203, "grad_norm": 2.2145164012908936, "learning_rate": 0.0002752646239554317, "loss": 0.7044, "step": 400 }, { "epoch": 3.252032520325203, "eval_loss": 0.6401560306549072, "eval_runtime": 20.8861, "eval_samples_per_second": 13.167, "eval_steps_per_second": 3.304, "eval_wer": 0.532061978545888, "step": 400 }, { "epoch": 4.065040650406504, "grad_norm": 1.0280542373657227, "learning_rate": 0.00026690807799442895, "loss": 0.6221, "step": 500 }, { "epoch": 4.065040650406504, "eval_loss": 0.6065016984939575, "eval_runtime": 20.9642, "eval_samples_per_second": 13.118, "eval_steps_per_second": 3.291, "eval_wer": 0.5165673420738975, "step": 500 }, { "epoch": 4.878048780487805, "grad_norm": 1.9754825830459595, "learning_rate": 0.00025855153203342614, "loss": 0.6016, "step": 600 }, { "epoch": 4.878048780487805, "eval_loss": 0.5947937965393066, "eval_runtime": 20.9641, "eval_samples_per_second": 13.118, "eval_steps_per_second": 3.291, "eval_wer": 0.4786650774731824, "step": 600 }, { "epoch": 5.691056910569106, "grad_norm": 14.851791381835938, "learning_rate": 0.0002502785515320334, "loss": 0.5686, "step": 700 }, { "epoch": 5.691056910569106, "eval_loss": 0.580583393573761, "eval_runtime": 20.7413, "eval_samples_per_second": 13.259, "eval_steps_per_second": 3.327, "eval_wer": 0.4641239570917759, "step": 700 }, { "epoch": 6.504065040650406, "grad_norm": 0.918157696723938, "learning_rate": 0.00024192200557103064, "loss": 0.6054, "step": 800 }, { "epoch": 6.504065040650406, "eval_loss": 0.5715970396995544, "eval_runtime": 20.8551, "eval_samples_per_second": 13.186, "eval_steps_per_second": 3.309, "eval_wer": 0.44862932061978544, "step": 800 }, { "epoch": 7.317073170731708, "grad_norm": 1.2295010089874268, "learning_rate": 0.00023356545961002783, "loss": 0.4871, "step": 900 }, { "epoch": 7.317073170731708, "eval_loss": 0.5732107162475586, "eval_runtime": 20.9032, "eval_samples_per_second": 13.156, "eval_steps_per_second": 3.301, "eval_wer": 0.4445768772348033, "step": 900 }, { "epoch": 8.130081300813009, "grad_norm": 1.2786221504211426, "learning_rate": 0.00022520891364902505, "loss": 0.5275, "step": 1000 }, { "epoch": 8.130081300813009, "eval_loss": 0.5667155385017395, "eval_runtime": 20.9254, "eval_samples_per_second": 13.142, "eval_steps_per_second": 3.297, "eval_wer": 0.43504171632896305, "step": 1000 }, { "epoch": 8.94308943089431, "grad_norm": 2.170154571533203, "learning_rate": 0.00021685236768802224, "loss": 0.5199, "step": 1100 }, { "epoch": 8.94308943089431, "eval_loss": 0.5688361525535583, "eval_runtime": 20.7636, "eval_samples_per_second": 13.244, "eval_steps_per_second": 3.323, "eval_wer": 0.4300357568533969, "step": 1100 }, { "epoch": 9.75609756097561, "grad_norm": 0.6021554470062256, "learning_rate": 0.0002084958217270195, "loss": 0.5031, "step": 1200 }, { "epoch": 9.75609756097561, "eval_loss": 0.5516279935836792, "eval_runtime": 20.7896, "eval_samples_per_second": 13.228, "eval_steps_per_second": 3.319, "eval_wer": 0.4443384982121573, "step": 1200 }, { "epoch": 10.56910569105691, "grad_norm": 0.8330698609352112, "learning_rate": 0.00020013927576601671, "loss": 0.4533, "step": 1300 }, { "epoch": 10.56910569105691, "eval_loss": 0.557715117931366, "eval_runtime": 21.0144, "eval_samples_per_second": 13.086, "eval_steps_per_second": 3.283, "eval_wer": 0.41787842669845054, "step": 1300 }, { "epoch": 11.382113821138212, "grad_norm": 1.5642321109771729, "learning_rate": 0.0001917827298050139, "loss": 0.4738, "step": 1400 }, { "epoch": 11.382113821138212, "eval_loss": 0.553620457649231, "eval_runtime": 20.9609, "eval_samples_per_second": 13.12, "eval_steps_per_second": 3.292, "eval_wer": 0.4057210965435042, "step": 1400 }, { "epoch": 12.195121951219512, "grad_norm": 6.336544513702393, "learning_rate": 0.00018342618384401113, "loss": 0.4925, "step": 1500 }, { "epoch": 12.195121951219512, "eval_loss": 0.5502671003341675, "eval_runtime": 20.8854, "eval_samples_per_second": 13.167, "eval_steps_per_second": 3.304, "eval_wer": 0.39690107270560193, "step": 1500 }, { "epoch": 13.008130081300813, "grad_norm": 1.8177986145019531, "learning_rate": 0.00017506963788300832, "loss": 0.441, "step": 1600 }, { "epoch": 13.008130081300813, "eval_loss": 0.5402783155441284, "eval_runtime": 20.8181, "eval_samples_per_second": 13.21, "eval_steps_per_second": 3.314, "eval_wer": 0.40047675804529204, "step": 1600 }, { "epoch": 13.821138211382113, "grad_norm": 0.8598074316978455, "learning_rate": 0.00016671309192200557, "loss": 0.4177, "step": 1700 }, { "epoch": 13.821138211382113, "eval_loss": 0.5563377737998962, "eval_runtime": 20.9575, "eval_samples_per_second": 13.122, "eval_steps_per_second": 3.292, "eval_wer": 0.3914183551847437, "step": 1700 }, { "epoch": 14.634146341463415, "grad_norm": 2.3619320392608643, "learning_rate": 0.00015835654596100277, "loss": 0.4589, "step": 1800 }, { "epoch": 14.634146341463415, "eval_loss": 0.5394493341445923, "eval_runtime": 21.0011, "eval_samples_per_second": 13.095, "eval_steps_per_second": 3.286, "eval_wer": 0.3876042908224076, "step": 1800 }, { "epoch": 15.447154471544716, "grad_norm": 3.2565107345581055, "learning_rate": 0.00015, "loss": 0.4131, "step": 1900 }, { "epoch": 15.447154471544716, "eval_loss": 0.5424718856811523, "eval_runtime": 20.8003, "eval_samples_per_second": 13.221, "eval_steps_per_second": 3.317, "eval_wer": 0.3957091775923719, "step": 1900 }, { "epoch": 16.260162601626018, "grad_norm": 1.814285397529602, "learning_rate": 0.0001416434540389972, "loss": 0.393, "step": 2000 }, { "epoch": 16.260162601626018, "eval_loss": 0.546908974647522, "eval_runtime": 20.881, "eval_samples_per_second": 13.17, "eval_steps_per_second": 3.304, "eval_wer": 0.3907032181168057, "step": 2000 }, { "epoch": 17.073170731707318, "grad_norm": 0.762374997138977, "learning_rate": 0.00013328690807799443, "loss": 0.4235, "step": 2100 }, { "epoch": 17.073170731707318, "eval_loss": 0.5356802940368652, "eval_runtime": 20.9384, "eval_samples_per_second": 13.134, "eval_steps_per_second": 3.295, "eval_wer": 0.3878426698450536, "step": 2100 }, { "epoch": 17.88617886178862, "grad_norm": 0.8089348077774048, "learning_rate": 0.00012493036211699162, "loss": 0.4113, "step": 2200 }, { "epoch": 17.88617886178862, "eval_loss": 0.5390728712081909, "eval_runtime": 20.9645, "eval_samples_per_second": 13.117, "eval_steps_per_second": 3.291, "eval_wer": 0.3802145411203814, "step": 2200 }, { "epoch": 18.69918699186992, "grad_norm": 1.4261620044708252, "learning_rate": 0.00011657381615598886, "loss": 0.3781, "step": 2300 }, { "epoch": 18.69918699186992, "eval_loss": 0.5324379205703735, "eval_runtime": 20.8837, "eval_samples_per_second": 13.168, "eval_steps_per_second": 3.304, "eval_wer": 0.3728247914183552, "step": 2300 }, { "epoch": 19.51219512195122, "grad_norm": 0.8106199502944946, "learning_rate": 0.00010821727019498607, "loss": 0.3706, "step": 2400 }, { "epoch": 19.51219512195122, "eval_loss": 0.5462723970413208, "eval_runtime": 20.8289, "eval_samples_per_second": 13.203, "eval_steps_per_second": 3.313, "eval_wer": 0.38259833134684146, "step": 2400 }, { "epoch": 20.32520325203252, "grad_norm": 1.8274589776992798, "learning_rate": 9.986072423398329e-05, "loss": 0.3617, "step": 2500 }, { "epoch": 20.32520325203252, "eval_loss": 0.5391108989715576, "eval_runtime": 21.0365, "eval_samples_per_second": 13.072, "eval_steps_per_second": 3.28, "eval_wer": 0.3697258641239571, "step": 2500 }, { "epoch": 21.13821138211382, "grad_norm": 2.291131019592285, "learning_rate": 9.15041782729805e-05, "loss": 0.401, "step": 2600 }, { "epoch": 21.13821138211382, "eval_loss": 0.5417346358299255, "eval_runtime": 21.0259, "eval_samples_per_second": 13.079, "eval_steps_per_second": 3.282, "eval_wer": 0.366388557806913, "step": 2600 }, { "epoch": 21.13821138211382, "step": 2600, "total_flos": 7.953394513554737e+18, "train_loss": 0.8388280222966121, "train_runtime": 2359.9883, "train_samples_per_second": 6.242, "train_steps_per_second": 1.564 } ], "logging_steps": 100, "max_steps": 3690, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.953394513554737e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }