|
{ |
|
"best_metric": 0.5324379205703735, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-lozgen-balanced-model/checkpoint-2300", |
|
"epoch": 21.13821138211382, |
|
"eval_steps": 100, |
|
"global_step": 2600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"grad_norm": 2.741861343383789, |
|
"learning_rate": 0.00028799999999999995, |
|
"loss": 6.6518, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"eval_loss": 3.1670827865600586, |
|
"eval_runtime": 20.9246, |
|
"eval_samples_per_second": 13.142, |
|
"eval_steps_per_second": 3.298, |
|
"eval_wer": 0.9942789034564958, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6260162601626016, |
|
"grad_norm": 1.7816331386566162, |
|
"learning_rate": 0.0002919777158774373, |
|
"loss": 2.6718, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.6260162601626016, |
|
"eval_loss": 2.2608625888824463, |
|
"eval_runtime": 20.9525, |
|
"eval_samples_per_second": 13.125, |
|
"eval_steps_per_second": 3.293, |
|
"eval_wer": 0.931585220500596, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": 1.4348615407943726, |
|
"learning_rate": 0.0002836211699164345, |
|
"loss": 1.4567, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"eval_loss": 0.740405261516571, |
|
"eval_runtime": 20.9256, |
|
"eval_samples_per_second": 13.142, |
|
"eval_steps_per_second": 3.297, |
|
"eval_wer": 0.7191895113230036, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.252032520325203, |
|
"grad_norm": 2.2145164012908936, |
|
"learning_rate": 0.0002752646239554317, |
|
"loss": 0.7044, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.252032520325203, |
|
"eval_loss": 0.6401560306549072, |
|
"eval_runtime": 20.8861, |
|
"eval_samples_per_second": 13.167, |
|
"eval_steps_per_second": 3.304, |
|
"eval_wer": 0.532061978545888, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.065040650406504, |
|
"grad_norm": 1.0280542373657227, |
|
"learning_rate": 0.00026690807799442895, |
|
"loss": 0.6221, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.065040650406504, |
|
"eval_loss": 0.6065016984939575, |
|
"eval_runtime": 20.9642, |
|
"eval_samples_per_second": 13.118, |
|
"eval_steps_per_second": 3.291, |
|
"eval_wer": 0.5165673420738975, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.878048780487805, |
|
"grad_norm": 1.9754825830459595, |
|
"learning_rate": 0.00025855153203342614, |
|
"loss": 0.6016, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.878048780487805, |
|
"eval_loss": 0.5947937965393066, |
|
"eval_runtime": 20.9641, |
|
"eval_samples_per_second": 13.118, |
|
"eval_steps_per_second": 3.291, |
|
"eval_wer": 0.4786650774731824, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.691056910569106, |
|
"grad_norm": 14.851791381835938, |
|
"learning_rate": 0.0002502785515320334, |
|
"loss": 0.5686, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.691056910569106, |
|
"eval_loss": 0.580583393573761, |
|
"eval_runtime": 20.7413, |
|
"eval_samples_per_second": 13.259, |
|
"eval_steps_per_second": 3.327, |
|
"eval_wer": 0.4641239570917759, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.504065040650406, |
|
"grad_norm": 0.918157696723938, |
|
"learning_rate": 0.00024192200557103064, |
|
"loss": 0.6054, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.504065040650406, |
|
"eval_loss": 0.5715970396995544, |
|
"eval_runtime": 20.8551, |
|
"eval_samples_per_second": 13.186, |
|
"eval_steps_per_second": 3.309, |
|
"eval_wer": 0.44862932061978544, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.317073170731708, |
|
"grad_norm": 1.2295010089874268, |
|
"learning_rate": 0.00023356545961002783, |
|
"loss": 0.4871, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.317073170731708, |
|
"eval_loss": 0.5732107162475586, |
|
"eval_runtime": 20.9032, |
|
"eval_samples_per_second": 13.156, |
|
"eval_steps_per_second": 3.301, |
|
"eval_wer": 0.4445768772348033, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.130081300813009, |
|
"grad_norm": 1.2786221504211426, |
|
"learning_rate": 0.00022520891364902505, |
|
"loss": 0.5275, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.130081300813009, |
|
"eval_loss": 0.5667155385017395, |
|
"eval_runtime": 20.9254, |
|
"eval_samples_per_second": 13.142, |
|
"eval_steps_per_second": 3.297, |
|
"eval_wer": 0.43504171632896305, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.94308943089431, |
|
"grad_norm": 2.170154571533203, |
|
"learning_rate": 0.00021685236768802224, |
|
"loss": 0.5199, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 8.94308943089431, |
|
"eval_loss": 0.5688361525535583, |
|
"eval_runtime": 20.7636, |
|
"eval_samples_per_second": 13.244, |
|
"eval_steps_per_second": 3.323, |
|
"eval_wer": 0.4300357568533969, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.75609756097561, |
|
"grad_norm": 0.6021554470062256, |
|
"learning_rate": 0.0002084958217270195, |
|
"loss": 0.5031, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.75609756097561, |
|
"eval_loss": 0.5516279935836792, |
|
"eval_runtime": 20.7896, |
|
"eval_samples_per_second": 13.228, |
|
"eval_steps_per_second": 3.319, |
|
"eval_wer": 0.4443384982121573, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.56910569105691, |
|
"grad_norm": 0.8330698609352112, |
|
"learning_rate": 0.00020013927576601671, |
|
"loss": 0.4533, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 10.56910569105691, |
|
"eval_loss": 0.557715117931366, |
|
"eval_runtime": 21.0144, |
|
"eval_samples_per_second": 13.086, |
|
"eval_steps_per_second": 3.283, |
|
"eval_wer": 0.41787842669845054, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 11.382113821138212, |
|
"grad_norm": 1.5642321109771729, |
|
"learning_rate": 0.0001917827298050139, |
|
"loss": 0.4738, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 11.382113821138212, |
|
"eval_loss": 0.553620457649231, |
|
"eval_runtime": 20.9609, |
|
"eval_samples_per_second": 13.12, |
|
"eval_steps_per_second": 3.292, |
|
"eval_wer": 0.4057210965435042, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 12.195121951219512, |
|
"grad_norm": 6.336544513702393, |
|
"learning_rate": 0.00018342618384401113, |
|
"loss": 0.4925, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.195121951219512, |
|
"eval_loss": 0.5502671003341675, |
|
"eval_runtime": 20.8854, |
|
"eval_samples_per_second": 13.167, |
|
"eval_steps_per_second": 3.304, |
|
"eval_wer": 0.39690107270560193, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.008130081300813, |
|
"grad_norm": 1.8177986145019531, |
|
"learning_rate": 0.00017506963788300832, |
|
"loss": 0.441, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 13.008130081300813, |
|
"eval_loss": 0.5402783155441284, |
|
"eval_runtime": 20.8181, |
|
"eval_samples_per_second": 13.21, |
|
"eval_steps_per_second": 3.314, |
|
"eval_wer": 0.40047675804529204, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 13.821138211382113, |
|
"grad_norm": 0.8598074316978455, |
|
"learning_rate": 0.00016671309192200557, |
|
"loss": 0.4177, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 13.821138211382113, |
|
"eval_loss": 0.5563377737998962, |
|
"eval_runtime": 20.9575, |
|
"eval_samples_per_second": 13.122, |
|
"eval_steps_per_second": 3.292, |
|
"eval_wer": 0.3914183551847437, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 14.634146341463415, |
|
"grad_norm": 2.3619320392608643, |
|
"learning_rate": 0.00015835654596100277, |
|
"loss": 0.4589, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 14.634146341463415, |
|
"eval_loss": 0.5394493341445923, |
|
"eval_runtime": 21.0011, |
|
"eval_samples_per_second": 13.095, |
|
"eval_steps_per_second": 3.286, |
|
"eval_wer": 0.3876042908224076, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 15.447154471544716, |
|
"grad_norm": 3.2565107345581055, |
|
"learning_rate": 0.00015, |
|
"loss": 0.4131, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 15.447154471544716, |
|
"eval_loss": 0.5424718856811523, |
|
"eval_runtime": 20.8003, |
|
"eval_samples_per_second": 13.221, |
|
"eval_steps_per_second": 3.317, |
|
"eval_wer": 0.3957091775923719, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 16.260162601626018, |
|
"grad_norm": 1.814285397529602, |
|
"learning_rate": 0.0001416434540389972, |
|
"loss": 0.393, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.260162601626018, |
|
"eval_loss": 0.546908974647522, |
|
"eval_runtime": 20.881, |
|
"eval_samples_per_second": 13.17, |
|
"eval_steps_per_second": 3.304, |
|
"eval_wer": 0.3907032181168057, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 17.073170731707318, |
|
"grad_norm": 0.762374997138977, |
|
"learning_rate": 0.00013328690807799443, |
|
"loss": 0.4235, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 17.073170731707318, |
|
"eval_loss": 0.5356802940368652, |
|
"eval_runtime": 20.9384, |
|
"eval_samples_per_second": 13.134, |
|
"eval_steps_per_second": 3.295, |
|
"eval_wer": 0.3878426698450536, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 17.88617886178862, |
|
"grad_norm": 0.8089348077774048, |
|
"learning_rate": 0.00012493036211699162, |
|
"loss": 0.4113, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 17.88617886178862, |
|
"eval_loss": 0.5390728712081909, |
|
"eval_runtime": 20.9645, |
|
"eval_samples_per_second": 13.117, |
|
"eval_steps_per_second": 3.291, |
|
"eval_wer": 0.3802145411203814, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 18.69918699186992, |
|
"grad_norm": 1.4261620044708252, |
|
"learning_rate": 0.00011657381615598886, |
|
"loss": 0.3781, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 18.69918699186992, |
|
"eval_loss": 0.5324379205703735, |
|
"eval_runtime": 20.8837, |
|
"eval_samples_per_second": 13.168, |
|
"eval_steps_per_second": 3.304, |
|
"eval_wer": 0.3728247914183552, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 19.51219512195122, |
|
"grad_norm": 0.8106199502944946, |
|
"learning_rate": 0.00010821727019498607, |
|
"loss": 0.3706, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 19.51219512195122, |
|
"eval_loss": 0.5462723970413208, |
|
"eval_runtime": 20.8289, |
|
"eval_samples_per_second": 13.203, |
|
"eval_steps_per_second": 3.313, |
|
"eval_wer": 0.38259833134684146, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 20.32520325203252, |
|
"grad_norm": 1.8274589776992798, |
|
"learning_rate": 9.986072423398329e-05, |
|
"loss": 0.3617, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 20.32520325203252, |
|
"eval_loss": 0.5391108989715576, |
|
"eval_runtime": 21.0365, |
|
"eval_samples_per_second": 13.072, |
|
"eval_steps_per_second": 3.28, |
|
"eval_wer": 0.3697258641239571, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 21.13821138211382, |
|
"grad_norm": 2.291131019592285, |
|
"learning_rate": 9.15041782729805e-05, |
|
"loss": 0.401, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 21.13821138211382, |
|
"eval_loss": 0.5417346358299255, |
|
"eval_runtime": 21.0259, |
|
"eval_samples_per_second": 13.079, |
|
"eval_steps_per_second": 3.282, |
|
"eval_wer": 0.366388557806913, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 21.13821138211382, |
|
"step": 2600, |
|
"total_flos": 7.953394513554737e+18, |
|
"train_loss": 0.8388280222966121, |
|
"train_runtime": 2359.9883, |
|
"train_samples_per_second": 6.242, |
|
"train_steps_per_second": 1.564 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 3690, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 1 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.953394513554737e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|