|
{ |
|
"best_metric": 0.5884432792663574, |
|
"best_model_checkpoint": "/scratch/skscla001/results/mms-1b-all-bem-natbed-nn-model/checkpoint-2200", |
|
"epoch": 6.320541760722348, |
|
"eval_steps": 100, |
|
"global_step": 2800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.22573363431151242, |
|
"grad_norm": 1.8729794025421143, |
|
"learning_rate": 0.00028799999999999995, |
|
"loss": 7.9244, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22573363431151242, |
|
"eval_loss": 1.35137140750885, |
|
"eval_runtime": 45.1782, |
|
"eval_samples_per_second": 15.671, |
|
"eval_steps_per_second": 1.97, |
|
"eval_wer": 1.0237771739130435, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.45146726862302483, |
|
"grad_norm": 2.6380980014801025, |
|
"learning_rate": 0.0002978165276724791, |
|
"loss": 1.0236, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45146726862302483, |
|
"eval_loss": 0.8354936838150024, |
|
"eval_runtime": 44.7017, |
|
"eval_samples_per_second": 15.838, |
|
"eval_steps_per_second": 1.991, |
|
"eval_wer": 0.6594769021739131, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6772009029345373, |
|
"grad_norm": 1.9870244264602661, |
|
"learning_rate": 0.0002955420773313116, |
|
"loss": 0.8005, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6772009029345373, |
|
"eval_loss": 0.7836518287658691, |
|
"eval_runtime": 45.0039, |
|
"eval_samples_per_second": 15.732, |
|
"eval_steps_per_second": 1.978, |
|
"eval_wer": 0.6141304347826086, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9029345372460497, |
|
"grad_norm": 2.012474536895752, |
|
"learning_rate": 0.00029326762699014404, |
|
"loss": 0.8968, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9029345372460497, |
|
"eval_loss": 0.7808529138565063, |
|
"eval_runtime": 44.7704, |
|
"eval_samples_per_second": 15.814, |
|
"eval_steps_per_second": 1.988, |
|
"eval_wer": 0.6042798913043478, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1286681715575622, |
|
"grad_norm": 8.587899208068848, |
|
"learning_rate": 0.00029099317664897645, |
|
"loss": 0.8909, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1286681715575622, |
|
"eval_loss": 0.7146816849708557, |
|
"eval_runtime": 44.5851, |
|
"eval_samples_per_second": 15.88, |
|
"eval_steps_per_second": 1.996, |
|
"eval_wer": 0.5952785326086957, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3544018058690745, |
|
"grad_norm": 8.078141212463379, |
|
"learning_rate": 0.0002887187263078089, |
|
"loss": 0.7983, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.3544018058690745, |
|
"eval_loss": 0.6989510655403137, |
|
"eval_runtime": 44.4701, |
|
"eval_samples_per_second": 15.921, |
|
"eval_steps_per_second": 2.001, |
|
"eval_wer": 0.5930706521739131, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.580135440180587, |
|
"grad_norm": 2.5096583366394043, |
|
"learning_rate": 0.00028644427596664137, |
|
"loss": 0.8563, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.580135440180587, |
|
"eval_loss": 0.6804757118225098, |
|
"eval_runtime": 44.8305, |
|
"eval_samples_per_second": 15.793, |
|
"eval_steps_per_second": 1.985, |
|
"eval_wer": 0.5964673913043478, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.8058690744920993, |
|
"grad_norm": 2.3668673038482666, |
|
"learning_rate": 0.00028416982562547383, |
|
"loss": 0.7094, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.8058690744920993, |
|
"eval_loss": 0.6849333047866821, |
|
"eval_runtime": 44.6427, |
|
"eval_samples_per_second": 15.859, |
|
"eval_steps_per_second": 1.994, |
|
"eval_wer": 0.5808423913043478, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.0316027088036117, |
|
"grad_norm": 1.9050077199935913, |
|
"learning_rate": 0.0002819181197877179, |
|
"loss": 0.7499, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.0316027088036117, |
|
"eval_loss": 0.6456880569458008, |
|
"eval_runtime": 44.6644, |
|
"eval_samples_per_second": 15.852, |
|
"eval_steps_per_second": 1.993, |
|
"eval_wer": 0.5934103260869565, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.2573363431151243, |
|
"grad_norm": 2.829195737838745, |
|
"learning_rate": 0.0002796436694465504, |
|
"loss": 0.7722, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.2573363431151243, |
|
"eval_loss": 0.6565266251564026, |
|
"eval_runtime": 44.7397, |
|
"eval_samples_per_second": 15.825, |
|
"eval_steps_per_second": 1.989, |
|
"eval_wer": 0.5874660326086957, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.4830699774266365, |
|
"grad_norm": 0.8484971523284912, |
|
"learning_rate": 0.00027736921910538284, |
|
"loss": 0.7099, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.4830699774266365, |
|
"eval_loss": 0.6419216394424438, |
|
"eval_runtime": 44.7023, |
|
"eval_samples_per_second": 15.838, |
|
"eval_steps_per_second": 1.991, |
|
"eval_wer": 0.5596127717391305, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.708803611738149, |
|
"grad_norm": 2.4867944717407227, |
|
"learning_rate": 0.0002750947687642153, |
|
"loss": 0.7416, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.708803611738149, |
|
"eval_loss": 0.6195096373558044, |
|
"eval_runtime": 44.7966, |
|
"eval_samples_per_second": 15.805, |
|
"eval_steps_per_second": 1.987, |
|
"eval_wer": 0.561141304347826, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.9345372460496613, |
|
"grad_norm": 4.84053373336792, |
|
"learning_rate": 0.00027282031842304776, |
|
"loss": 0.6385, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.9345372460496613, |
|
"eval_loss": 0.6227733492851257, |
|
"eval_runtime": 44.9228, |
|
"eval_samples_per_second": 15.76, |
|
"eval_steps_per_second": 1.981, |
|
"eval_wer": 0.5647078804347826, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.160270880361174, |
|
"grad_norm": 0.9543440341949463, |
|
"learning_rate": 0.0002705458680818802, |
|
"loss": 0.6436, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.160270880361174, |
|
"eval_loss": 0.6184154152870178, |
|
"eval_runtime": 44.7251, |
|
"eval_samples_per_second": 15.83, |
|
"eval_steps_per_second": 1.99, |
|
"eval_wer": 0.5509510869565217, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.386004514672686, |
|
"grad_norm": 0.8133373856544495, |
|
"learning_rate": 0.0002682714177407126, |
|
"loss": 0.6795, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.386004514672686, |
|
"eval_loss": 0.6156527996063232, |
|
"eval_runtime": 45.3372, |
|
"eval_samples_per_second": 15.616, |
|
"eval_steps_per_second": 1.963, |
|
"eval_wer": 0.553328804347826, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.6117381489841986, |
|
"grad_norm": 25.58840560913086, |
|
"learning_rate": 0.0002659969673995451, |
|
"loss": 0.7027, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.6117381489841986, |
|
"eval_loss": 0.6343082785606384, |
|
"eval_runtime": 45.529, |
|
"eval_samples_per_second": 15.551, |
|
"eval_steps_per_second": 1.955, |
|
"eval_wer": 0.5426290760869565, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.837471783295711, |
|
"grad_norm": 0.6009318828582764, |
|
"learning_rate": 0.00026372251705837754, |
|
"loss": 0.6585, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.837471783295711, |
|
"eval_loss": 0.6057115793228149, |
|
"eval_runtime": 44.8336, |
|
"eval_samples_per_second": 15.792, |
|
"eval_steps_per_second": 1.985, |
|
"eval_wer": 0.5427989130434783, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.063205417607223, |
|
"grad_norm": 0.929165244102478, |
|
"learning_rate": 0.00026144806671720994, |
|
"loss": 0.6351, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.063205417607223, |
|
"eval_loss": 0.6017059683799744, |
|
"eval_runtime": 44.5067, |
|
"eval_samples_per_second": 15.908, |
|
"eval_steps_per_second": 2.0, |
|
"eval_wer": 0.54296875, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.288939051918736, |
|
"grad_norm": 1.4761062860488892, |
|
"learning_rate": 0.00025917361637604246, |
|
"loss": 0.6528, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.288939051918736, |
|
"eval_loss": 0.6098975539207458, |
|
"eval_runtime": 44.8754, |
|
"eval_samples_per_second": 15.777, |
|
"eval_steps_per_second": 1.983, |
|
"eval_wer": 0.5339673913043478, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.514672686230249, |
|
"grad_norm": 1.2957922220230103, |
|
"learning_rate": 0.00025689916603487486, |
|
"loss": 0.6603, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.514672686230249, |
|
"eval_loss": 0.621790885925293, |
|
"eval_runtime": 45.2703, |
|
"eval_samples_per_second": 15.639, |
|
"eval_steps_per_second": 1.966, |
|
"eval_wer": 0.5334578804347826, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.74040632054176, |
|
"grad_norm": 3.225343942642212, |
|
"learning_rate": 0.0002546247156937073, |
|
"loss": 0.6676, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.74040632054176, |
|
"eval_loss": 0.5977216958999634, |
|
"eval_runtime": 44.9573, |
|
"eval_samples_per_second": 15.748, |
|
"eval_steps_per_second": 1.98, |
|
"eval_wer": 0.5322690217391305, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.966139954853273, |
|
"grad_norm": 1.4750922918319702, |
|
"learning_rate": 0.0002523502653525398, |
|
"loss": 0.6304, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.966139954853273, |
|
"eval_loss": 0.5884432792663574, |
|
"eval_runtime": 45.456, |
|
"eval_samples_per_second": 15.575, |
|
"eval_steps_per_second": 1.958, |
|
"eval_wer": 0.5332880434782609, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.191873589164786, |
|
"grad_norm": 0.7652086615562439, |
|
"learning_rate": 0.00025007581501137224, |
|
"loss": 0.5976, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.191873589164786, |
|
"eval_loss": 0.5955621600151062, |
|
"eval_runtime": 45.3065, |
|
"eval_samples_per_second": 15.627, |
|
"eval_steps_per_second": 1.964, |
|
"eval_wer": 0.5227581521739131, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.417607223476298, |
|
"grad_norm": 3.0203540325164795, |
|
"learning_rate": 0.00024780136467020465, |
|
"loss": 0.6564, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.417607223476298, |
|
"eval_loss": 0.5956509709358215, |
|
"eval_runtime": 45.0964, |
|
"eval_samples_per_second": 15.7, |
|
"eval_steps_per_second": 1.974, |
|
"eval_wer": 0.5302309782608695, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.643340857787811, |
|
"grad_norm": 1.8780221939086914, |
|
"learning_rate": 0.00024552691432903716, |
|
"loss": 0.6717, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.643340857787811, |
|
"eval_loss": 0.5767239332199097, |
|
"eval_runtime": 44.9865, |
|
"eval_samples_per_second": 15.738, |
|
"eval_steps_per_second": 1.978, |
|
"eval_wer": 0.5183423913043478, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.8690744920993225, |
|
"grad_norm": 0.8542383909225464, |
|
"learning_rate": 0.00024325246398786956, |
|
"loss": 0.6091, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.8690744920993225, |
|
"eval_loss": 0.592084527015686, |
|
"eval_runtime": 44.6533, |
|
"eval_samples_per_second": 15.855, |
|
"eval_steps_per_second": 1.993, |
|
"eval_wer": 0.52734375, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 6.094808126410835, |
|
"grad_norm": 1.4033461809158325, |
|
"learning_rate": 0.00024097801364670205, |
|
"loss": 0.6168, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 6.094808126410835, |
|
"eval_loss": 0.5894186496734619, |
|
"eval_runtime": 45.0539, |
|
"eval_samples_per_second": 15.715, |
|
"eval_steps_per_second": 1.975, |
|
"eval_wer": 0.5275135869565217, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 6.320541760722348, |
|
"grad_norm": 10.08028507232666, |
|
"learning_rate": 0.00023870356330553448, |
|
"loss": 0.6495, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.320541760722348, |
|
"eval_loss": 0.6036040782928467, |
|
"eval_runtime": 45.0407, |
|
"eval_samples_per_second": 15.719, |
|
"eval_steps_per_second": 1.976, |
|
"eval_wer": 0.5197010869565217, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.320541760722348, |
|
"step": 2800, |
|
"total_flos": 1.3313268682658783e+19, |
|
"train_loss": 0.972996187210083, |
|
"train_runtime": 3933.908, |
|
"train_samples_per_second": 27.011, |
|
"train_steps_per_second": 3.378 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 13290, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3313268682658783e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|