|
{ |
|
"best_metric": 0.2750256657600403, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-bemgen-balanced-model/checkpoint-1700", |
|
"epoch": 2.0618556701030926, |
|
"eval_steps": 100, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10309278350515463, |
|
"grad_norm": 2.7115845680236816, |
|
"learning_rate": 0.00028799999999999995, |
|
"loss": 6.8327, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10309278350515463, |
|
"eval_loss": 0.8479615449905396, |
|
"eval_runtime": 30.8075, |
|
"eval_samples_per_second": 16.003, |
|
"eval_steps_per_second": 4.025, |
|
"eval_wer": 0.788869640537029, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20618556701030927, |
|
"grad_norm": 2.6976242065429688, |
|
"learning_rate": 0.0002990068965517241, |
|
"loss": 0.5906, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.20618556701030927, |
|
"eval_loss": 0.3704410493373871, |
|
"eval_runtime": 30.4245, |
|
"eval_samples_per_second": 16.204, |
|
"eval_steps_per_second": 4.076, |
|
"eval_wer": 0.5818536162841057, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.30927835051546393, |
|
"grad_norm": 3.159865379333496, |
|
"learning_rate": 0.00029797241379310343, |
|
"loss": 0.4809, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.30927835051546393, |
|
"eval_loss": 0.33272889256477356, |
|
"eval_runtime": 30.3459, |
|
"eval_samples_per_second": 16.246, |
|
"eval_steps_per_second": 4.086, |
|
"eval_wer": 0.503897791251624, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.41237113402061853, |
|
"grad_norm": 2.7093732357025146, |
|
"learning_rate": 0.00029693793103448274, |
|
"loss": 0.4495, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.41237113402061853, |
|
"eval_loss": 0.3171828091144562, |
|
"eval_runtime": 30.503, |
|
"eval_samples_per_second": 16.162, |
|
"eval_steps_per_second": 4.065, |
|
"eval_wer": 0.48938934603724554, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5154639175257731, |
|
"grad_norm": 2.6349027156829834, |
|
"learning_rate": 0.00029590344827586204, |
|
"loss": 0.4266, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5154639175257731, |
|
"eval_loss": 0.3101615607738495, |
|
"eval_runtime": 30.7842, |
|
"eval_samples_per_second": 16.015, |
|
"eval_steps_per_second": 4.028, |
|
"eval_wer": 0.4631875270679948, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6185567010309279, |
|
"grad_norm": 19.215137481689453, |
|
"learning_rate": 0.00029486896551724135, |
|
"loss": 0.4167, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6185567010309279, |
|
"eval_loss": 0.30752459168434143, |
|
"eval_runtime": 30.6141, |
|
"eval_samples_per_second": 16.104, |
|
"eval_steps_per_second": 4.05, |
|
"eval_wer": 0.4716327414465136, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7216494845360825, |
|
"grad_norm": 1.4912749528884888, |
|
"learning_rate": 0.00029383448275862066, |
|
"loss": 0.4151, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7216494845360825, |
|
"eval_loss": 0.2996384799480438, |
|
"eval_runtime": 30.5121, |
|
"eval_samples_per_second": 16.158, |
|
"eval_steps_per_second": 4.064, |
|
"eval_wer": 0.48289302728453876, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8247422680412371, |
|
"grad_norm": 2.437255859375, |
|
"learning_rate": 0.00029279999999999996, |
|
"loss": 0.3955, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8247422680412371, |
|
"eval_loss": 0.29851067066192627, |
|
"eval_runtime": 30.5988, |
|
"eval_samples_per_second": 16.112, |
|
"eval_steps_per_second": 4.052, |
|
"eval_wer": 0.47119965352966653, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9278350515463918, |
|
"grad_norm": 1.967859148979187, |
|
"learning_rate": 0.00029176551724137927, |
|
"loss": 0.3802, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9278350515463918, |
|
"eval_loss": 0.2959710359573364, |
|
"eval_runtime": 30.8901, |
|
"eval_samples_per_second": 15.96, |
|
"eval_steps_per_second": 4.014, |
|
"eval_wer": 0.49263750541359896, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.0309278350515463, |
|
"grad_norm": 0.8750647902488708, |
|
"learning_rate": 0.00029073103448275857, |
|
"loss": 0.392, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0309278350515463, |
|
"eval_loss": 0.28385233879089355, |
|
"eval_runtime": 30.857, |
|
"eval_samples_per_second": 15.977, |
|
"eval_steps_per_second": 4.019, |
|
"eval_wer": 0.43741879601559114, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.134020618556701, |
|
"grad_norm": 0.8763826489448547, |
|
"learning_rate": 0.0002896965517241379, |
|
"loss": 0.375, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.134020618556701, |
|
"eval_loss": 0.28370916843414307, |
|
"eval_runtime": 30.6261, |
|
"eval_samples_per_second": 16.097, |
|
"eval_steps_per_second": 4.049, |
|
"eval_wer": 0.4317886530965786, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.2371134020618557, |
|
"grad_norm": 0.6871535181999207, |
|
"learning_rate": 0.0002886620689655172, |
|
"loss": 0.3885, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.2371134020618557, |
|
"eval_loss": 0.281158447265625, |
|
"eval_runtime": 30.5181, |
|
"eval_samples_per_second": 16.154, |
|
"eval_steps_per_second": 4.063, |
|
"eval_wer": 0.4257254222607189, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.3402061855670104, |
|
"grad_norm": 1.2791551351547241, |
|
"learning_rate": 0.00028762758620689654, |
|
"loss": 0.3824, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.3402061855670104, |
|
"eval_loss": 0.282485693693161, |
|
"eval_runtime": 30.9775, |
|
"eval_samples_per_second": 15.915, |
|
"eval_steps_per_second": 4.003, |
|
"eval_wer": 0.4255088783022954, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.443298969072165, |
|
"grad_norm": 2.2985143661499023, |
|
"learning_rate": 0.00028659310344827585, |
|
"loss": 0.3906, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.443298969072165, |
|
"eval_loss": 0.2794145941734314, |
|
"eval_runtime": 30.8505, |
|
"eval_samples_per_second": 15.98, |
|
"eval_steps_per_second": 4.019, |
|
"eval_wer": 0.42897358163707233, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.5463917525773194, |
|
"grad_norm": 0.9774999618530273, |
|
"learning_rate": 0.00028555862068965516, |
|
"loss": 0.3465, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.5463917525773194, |
|
"eval_loss": 0.28065648674964905, |
|
"eval_runtime": 30.5852, |
|
"eval_samples_per_second": 16.119, |
|
"eval_steps_per_second": 4.054, |
|
"eval_wer": 0.42832394976180166, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.6494845360824741, |
|
"grad_norm": 0.6370195746421814, |
|
"learning_rate": 0.00028452413793103446, |
|
"loss": 0.3564, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.6494845360824741, |
|
"eval_loss": 0.2772924304008484, |
|
"eval_runtime": 30.7481, |
|
"eval_samples_per_second": 16.034, |
|
"eval_steps_per_second": 4.033, |
|
"eval_wer": 0.4237765266349069, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.7525773195876289, |
|
"grad_norm": 0.919018566608429, |
|
"learning_rate": 0.00028348965517241377, |
|
"loss": 0.3617, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.7525773195876289, |
|
"eval_loss": 0.2750256657600403, |
|
"eval_runtime": 31.0143, |
|
"eval_samples_per_second": 15.896, |
|
"eval_steps_per_second": 3.998, |
|
"eval_wer": 0.4452143785188393, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.8556701030927836, |
|
"grad_norm": 1.0795314311981201, |
|
"learning_rate": 0.0002824551724137931, |
|
"loss": 0.3808, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.8556701030927836, |
|
"eval_loss": 0.27826789021492004, |
|
"eval_runtime": 30.9277, |
|
"eval_samples_per_second": 15.94, |
|
"eval_steps_per_second": 4.009, |
|
"eval_wer": 0.42291035080121264, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.9587628865979383, |
|
"grad_norm": 1.087866187095642, |
|
"learning_rate": 0.0002814206896551724, |
|
"loss": 0.3661, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.9587628865979383, |
|
"eval_loss": 0.2761477828025818, |
|
"eval_runtime": 30.7566, |
|
"eval_samples_per_second": 16.029, |
|
"eval_steps_per_second": 4.032, |
|
"eval_wer": 0.4517106972715461, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.0618556701030926, |
|
"grad_norm": 0.7742441892623901, |
|
"learning_rate": 0.0002803862068965517, |
|
"loss": 0.3952, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0618556701030926, |
|
"eval_loss": 0.27530166506767273, |
|
"eval_runtime": 30.8874, |
|
"eval_samples_per_second": 15.961, |
|
"eval_steps_per_second": 4.015, |
|
"eval_wer": 0.4200952793417064, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0618556701030926, |
|
"step": 2000, |
|
"total_flos": 4.819386981327346e+18, |
|
"train_loss": 0.7261434955596924, |
|
"train_runtime": 1981.0289, |
|
"train_samples_per_second": 58.727, |
|
"train_steps_per_second": 14.689 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 29100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.819386981327346e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|