|
{ |
|
"best_metric": 0.25605687499046326, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-nyagen-combined-model/checkpoint-1000", |
|
"epoch": 4.255659121171771, |
|
"eval_steps": 200, |
|
"global_step": 1600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06657789613848203, |
|
"grad_norm": 56.093265533447266, |
|
"learning_rate": 4.2000000000000006e-07, |
|
"loss": 5.267, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.13315579227696406, |
|
"grad_norm": 34.71885681152344, |
|
"learning_rate": 9.200000000000001e-07, |
|
"loss": 4.1657, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19973368841544606, |
|
"grad_norm": 31.17400360107422, |
|
"learning_rate": 1.42e-06, |
|
"loss": 3.1786, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2663115845539281, |
|
"grad_norm": 30.04043960571289, |
|
"learning_rate": 1.9200000000000003e-06, |
|
"loss": 2.1171, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.33288948069241014, |
|
"grad_norm": 21.41140365600586, |
|
"learning_rate": 2.42e-06, |
|
"loss": 1.7518, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3994673768308921, |
|
"grad_norm": 20.285459518432617, |
|
"learning_rate": 2.92e-06, |
|
"loss": 1.3976, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.46604527296937415, |
|
"grad_norm": 19.031597137451172, |
|
"learning_rate": 3.4200000000000007e-06, |
|
"loss": 1.2585, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.5326231691078562, |
|
"grad_norm": 22.678789138793945, |
|
"learning_rate": 3.920000000000001e-06, |
|
"loss": 1.1602, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5326231691078562, |
|
"eval_loss": 0.5182062983512878, |
|
"eval_runtime": 169.8362, |
|
"eval_samples_per_second": 2.096, |
|
"eval_steps_per_second": 0.524, |
|
"eval_wer": 0.3746898263027295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5992010652463382, |
|
"grad_norm": 19.633346557617188, |
|
"learning_rate": 4.42e-06, |
|
"loss": 0.976, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6657789613848203, |
|
"grad_norm": 22.2548828125, |
|
"learning_rate": 4.92e-06, |
|
"loss": 0.9464, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7323568575233023, |
|
"grad_norm": 19.41946029663086, |
|
"learning_rate": 5.420000000000001e-06, |
|
"loss": 0.8527, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.7989347536617842, |
|
"grad_norm": 20.135112762451172, |
|
"learning_rate": 5.92e-06, |
|
"loss": 0.8483, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8655126498002663, |
|
"grad_norm": 17.5079345703125, |
|
"learning_rate": 6.42e-06, |
|
"loss": 0.9001, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.9320905459387483, |
|
"grad_norm": 21.997474670410156, |
|
"learning_rate": 6.92e-06, |
|
"loss": 0.761, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9986684420772304, |
|
"grad_norm": 20.054590225219727, |
|
"learning_rate": 7.420000000000001e-06, |
|
"loss": 0.7337, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.0639147802929427, |
|
"grad_norm": 16.24369239807129, |
|
"learning_rate": 7.92e-06, |
|
"loss": 0.5456, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0639147802929427, |
|
"eval_loss": 0.3444797098636627, |
|
"eval_runtime": 166.2672, |
|
"eval_samples_per_second": 2.141, |
|
"eval_steps_per_second": 0.535, |
|
"eval_wer": 0.25516956162117455, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1304926764314247, |
|
"grad_norm": 17.575782775878906, |
|
"learning_rate": 8.42e-06, |
|
"loss": 0.4736, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.1970705725699067, |
|
"grad_norm": 12.86981201171875, |
|
"learning_rate": 8.920000000000001e-06, |
|
"loss": 0.535, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.2636484687083889, |
|
"grad_norm": 15.951074600219727, |
|
"learning_rate": 9.42e-06, |
|
"loss": 0.5233, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.3302263648468708, |
|
"grad_norm": 18.161441802978516, |
|
"learning_rate": 9.920000000000002e-06, |
|
"loss": 0.5483, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3968042609853528, |
|
"grad_norm": 13.9922513961792, |
|
"learning_rate": 9.953333333333333e-06, |
|
"loss": 0.5045, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.463382157123835, |
|
"grad_norm": 19.316190719604492, |
|
"learning_rate": 9.89777777777778e-06, |
|
"loss": 0.4916, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.5299600532623168, |
|
"grad_norm": 14.279507637023926, |
|
"learning_rate": 9.842222222222223e-06, |
|
"loss": 0.5007, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.596537949400799, |
|
"grad_norm": 15.150153160095215, |
|
"learning_rate": 9.786666666666667e-06, |
|
"loss": 0.5516, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.596537949400799, |
|
"eval_loss": 0.29034528136253357, |
|
"eval_runtime": 169.9132, |
|
"eval_samples_per_second": 2.095, |
|
"eval_steps_per_second": 0.524, |
|
"eval_wer": 0.2413151364764268, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.663115845539281, |
|
"grad_norm": 11.70125675201416, |
|
"learning_rate": 9.731111111111113e-06, |
|
"loss": 0.4788, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.729693741677763, |
|
"grad_norm": 12.724228858947754, |
|
"learning_rate": 9.675555555555555e-06, |
|
"loss": 0.4391, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.796271637816245, |
|
"grad_norm": 13.262675285339355, |
|
"learning_rate": 9.620000000000001e-06, |
|
"loss": 0.5023, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.862849533954727, |
|
"grad_norm": 15.492055892944336, |
|
"learning_rate": 9.564444444444445e-06, |
|
"loss": 0.493, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.929427430093209, |
|
"grad_norm": 13.981544494628906, |
|
"learning_rate": 9.508888888888889e-06, |
|
"loss": 0.4262, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.996005326231691, |
|
"grad_norm": 10.144460678100586, |
|
"learning_rate": 9.453333333333335e-06, |
|
"loss": 0.4629, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.0612516644474033, |
|
"grad_norm": 13.497632026672363, |
|
"learning_rate": 9.397777777777779e-06, |
|
"loss": 0.2155, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.1278295605858855, |
|
"grad_norm": 7.0717878341674805, |
|
"learning_rate": 9.342222222222223e-06, |
|
"loss": 0.224, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.1278295605858855, |
|
"eval_loss": 0.2817358374595642, |
|
"eval_runtime": 174.0748, |
|
"eval_samples_per_second": 2.045, |
|
"eval_steps_per_second": 0.511, |
|
"eval_wer": 0.23842018196856907, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.1944074567243677, |
|
"grad_norm": 6.820193767547607, |
|
"learning_rate": 9.286666666666667e-06, |
|
"loss": 0.241, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.2609853528628494, |
|
"grad_norm": 8.093194007873535, |
|
"learning_rate": 9.231111111111111e-06, |
|
"loss": 0.2301, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.3275632490013316, |
|
"grad_norm": 6.863702774047852, |
|
"learning_rate": 9.175555555555557e-06, |
|
"loss": 0.2417, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.3941411451398134, |
|
"grad_norm": 8.645722389221191, |
|
"learning_rate": 9.12e-06, |
|
"loss": 0.2456, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.4607190412782955, |
|
"grad_norm": 6.348605632781982, |
|
"learning_rate": 9.064444444444447e-06, |
|
"loss": 0.2535, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.5272969374167777, |
|
"grad_norm": 12.011576652526855, |
|
"learning_rate": 9.008888888888889e-06, |
|
"loss": 0.241, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.5938748335552595, |
|
"grad_norm": 8.227922439575195, |
|
"learning_rate": 8.953333333333335e-06, |
|
"loss": 0.1779, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.6604527296937417, |
|
"grad_norm": 10.178849220275879, |
|
"learning_rate": 8.897777777777779e-06, |
|
"loss": 0.2413, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.6604527296937417, |
|
"eval_loss": 0.25605687499046326, |
|
"eval_runtime": 166.4206, |
|
"eval_samples_per_second": 2.139, |
|
"eval_steps_per_second": 0.535, |
|
"eval_wer": 0.19520264681555005, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.7270306258322234, |
|
"grad_norm": 8.071171760559082, |
|
"learning_rate": 8.842222222222223e-06, |
|
"loss": 0.1885, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.7936085219707056, |
|
"grad_norm": 10.7284574508667, |
|
"learning_rate": 8.786666666666668e-06, |
|
"loss": 0.2392, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.860186418109188, |
|
"grad_norm": 6.85145902633667, |
|
"learning_rate": 8.73111111111111e-06, |
|
"loss": 0.2225, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.92676431424767, |
|
"grad_norm": 11.033931732177734, |
|
"learning_rate": 8.675555555555556e-06, |
|
"loss": 0.2241, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.993342210386152, |
|
"grad_norm": 7.744363307952881, |
|
"learning_rate": 8.62e-06, |
|
"loss": 0.1949, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 3.0585885486018642, |
|
"grad_norm": 7.1467084884643555, |
|
"learning_rate": 8.564444444444445e-06, |
|
"loss": 0.1003, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.125166444740346, |
|
"grad_norm": 4.110116004943848, |
|
"learning_rate": 8.50888888888889e-06, |
|
"loss": 0.0878, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 3.191744340878828, |
|
"grad_norm": 4.550497531890869, |
|
"learning_rate": 8.453333333333334e-06, |
|
"loss": 0.1036, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.191744340878828, |
|
"eval_loss": 0.2583100497722626, |
|
"eval_runtime": 166.1164, |
|
"eval_samples_per_second": 2.143, |
|
"eval_steps_per_second": 0.536, |
|
"eval_wer": 0.19044665012406947, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.2583222370173104, |
|
"grad_norm": 5.678071975708008, |
|
"learning_rate": 8.397777777777778e-06, |
|
"loss": 0.0985, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 3.324900133155792, |
|
"grad_norm": 6.956425189971924, |
|
"learning_rate": 8.342222222222222e-06, |
|
"loss": 0.0963, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.3914780292942743, |
|
"grad_norm": 5.823480606079102, |
|
"learning_rate": 8.286666666666668e-06, |
|
"loss": 0.1072, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 3.458055925432756, |
|
"grad_norm": 3.2603001594543457, |
|
"learning_rate": 8.231111111111112e-06, |
|
"loss": 0.0844, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.5246338215712383, |
|
"grad_norm": 3.718132257461548, |
|
"learning_rate": 8.175555555555556e-06, |
|
"loss": 0.1595, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 3.5912117177097205, |
|
"grad_norm": 7.773962497711182, |
|
"learning_rate": 8.120000000000002e-06, |
|
"loss": 0.1304, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.6577896138482027, |
|
"grad_norm": 8.074212074279785, |
|
"learning_rate": 8.064444444444444e-06, |
|
"loss": 0.0951, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 3.7243675099866844, |
|
"grad_norm": 3.417116165161133, |
|
"learning_rate": 8.00888888888889e-06, |
|
"loss": 0.1135, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.7243675099866844, |
|
"eval_loss": 0.2636599540710449, |
|
"eval_runtime": 173.0821, |
|
"eval_samples_per_second": 2.057, |
|
"eval_steps_per_second": 0.514, |
|
"eval_wer": 0.2119520264681555, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.790945406125166, |
|
"grad_norm": 3.8932766914367676, |
|
"learning_rate": 7.953333333333334e-06, |
|
"loss": 0.1156, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 3.8575233022636484, |
|
"grad_norm": 5.214465618133545, |
|
"learning_rate": 7.897777777777778e-06, |
|
"loss": 0.1334, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.9241011984021306, |
|
"grad_norm": 4.3109331130981445, |
|
"learning_rate": 7.842222222222224e-06, |
|
"loss": 0.1106, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 3.9906790945406128, |
|
"grad_norm": 5.095810413360596, |
|
"learning_rate": 7.786666666666666e-06, |
|
"loss": 0.1038, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.055925432756325, |
|
"grad_norm": 3.1624755859375, |
|
"learning_rate": 7.731111111111112e-06, |
|
"loss": 0.0806, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 4.1225033288948065, |
|
"grad_norm": 6.265628814697266, |
|
"learning_rate": 7.675555555555556e-06, |
|
"loss": 0.0626, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 4.189081225033289, |
|
"grad_norm": 3.3597617149353027, |
|
"learning_rate": 7.620000000000001e-06, |
|
"loss": 0.0594, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 4.255659121171771, |
|
"grad_norm": 6.631519794464111, |
|
"learning_rate": 7.564444444444446e-06, |
|
"loss": 0.057, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.255659121171771, |
|
"eval_loss": 0.273118257522583, |
|
"eval_runtime": 172.5497, |
|
"eval_samples_per_second": 2.063, |
|
"eval_steps_per_second": 0.516, |
|
"eval_wer": 0.20967741935483872, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.255659121171771, |
|
"step": 1600, |
|
"total_flos": 1.303516587884544e+19, |
|
"train_loss": 0.610494866669178, |
|
"train_runtime": 3998.2703, |
|
"train_samples_per_second": 10.004, |
|
"train_steps_per_second": 1.251 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 14, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.303516587884544e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|