{ "best_metric": 0.25605687499046326, "best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-nyagen-combined-model/checkpoint-1000", "epoch": 4.255659121171771, "eval_steps": 200, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06657789613848203, "grad_norm": 56.093265533447266, "learning_rate": 4.2000000000000006e-07, "loss": 5.267, "step": 25 }, { "epoch": 0.13315579227696406, "grad_norm": 34.71885681152344, "learning_rate": 9.200000000000001e-07, "loss": 4.1657, "step": 50 }, { "epoch": 0.19973368841544606, "grad_norm": 31.17400360107422, "learning_rate": 1.42e-06, "loss": 3.1786, "step": 75 }, { "epoch": 0.2663115845539281, "grad_norm": 30.04043960571289, "learning_rate": 1.9200000000000003e-06, "loss": 2.1171, "step": 100 }, { "epoch": 0.33288948069241014, "grad_norm": 21.41140365600586, "learning_rate": 2.42e-06, "loss": 1.7518, "step": 125 }, { "epoch": 0.3994673768308921, "grad_norm": 20.285459518432617, "learning_rate": 2.92e-06, "loss": 1.3976, "step": 150 }, { "epoch": 0.46604527296937415, "grad_norm": 19.031597137451172, "learning_rate": 3.4200000000000007e-06, "loss": 1.2585, "step": 175 }, { "epoch": 0.5326231691078562, "grad_norm": 22.678789138793945, "learning_rate": 3.920000000000001e-06, "loss": 1.1602, "step": 200 }, { "epoch": 0.5326231691078562, "eval_loss": 0.5182062983512878, "eval_runtime": 169.8362, "eval_samples_per_second": 2.096, "eval_steps_per_second": 0.524, "eval_wer": 0.3746898263027295, "step": 200 }, { "epoch": 0.5992010652463382, "grad_norm": 19.633346557617188, "learning_rate": 4.42e-06, "loss": 0.976, "step": 225 }, { "epoch": 0.6657789613848203, "grad_norm": 22.2548828125, "learning_rate": 4.92e-06, "loss": 0.9464, "step": 250 }, { "epoch": 0.7323568575233023, "grad_norm": 19.41946029663086, "learning_rate": 5.420000000000001e-06, "loss": 0.8527, "step": 275 }, { "epoch": 0.7989347536617842, "grad_norm": 20.135112762451172, "learning_rate": 5.92e-06, "loss": 0.8483, "step": 300 }, { "epoch": 0.8655126498002663, "grad_norm": 17.5079345703125, "learning_rate": 6.42e-06, "loss": 0.9001, "step": 325 }, { "epoch": 0.9320905459387483, "grad_norm": 21.997474670410156, "learning_rate": 6.92e-06, "loss": 0.761, "step": 350 }, { "epoch": 0.9986684420772304, "grad_norm": 20.054590225219727, "learning_rate": 7.420000000000001e-06, "loss": 0.7337, "step": 375 }, { "epoch": 1.0639147802929427, "grad_norm": 16.24369239807129, "learning_rate": 7.92e-06, "loss": 0.5456, "step": 400 }, { "epoch": 1.0639147802929427, "eval_loss": 0.3444797098636627, "eval_runtime": 166.2672, "eval_samples_per_second": 2.141, "eval_steps_per_second": 0.535, "eval_wer": 0.25516956162117455, "step": 400 }, { "epoch": 1.1304926764314247, "grad_norm": 17.575782775878906, "learning_rate": 8.42e-06, "loss": 0.4736, "step": 425 }, { "epoch": 1.1970705725699067, "grad_norm": 12.86981201171875, "learning_rate": 8.920000000000001e-06, "loss": 0.535, "step": 450 }, { "epoch": 1.2636484687083889, "grad_norm": 15.951074600219727, "learning_rate": 9.42e-06, "loss": 0.5233, "step": 475 }, { "epoch": 1.3302263648468708, "grad_norm": 18.161441802978516, "learning_rate": 9.920000000000002e-06, "loss": 0.5483, "step": 500 }, { "epoch": 1.3968042609853528, "grad_norm": 13.9922513961792, "learning_rate": 9.953333333333333e-06, "loss": 0.5045, "step": 525 }, { "epoch": 1.463382157123835, "grad_norm": 19.316190719604492, "learning_rate": 9.89777777777778e-06, "loss": 0.4916, "step": 550 }, { "epoch": 1.5299600532623168, "grad_norm": 14.279507637023926, "learning_rate": 9.842222222222223e-06, "loss": 0.5007, "step": 575 }, { "epoch": 1.596537949400799, "grad_norm": 15.150153160095215, "learning_rate": 9.786666666666667e-06, "loss": 0.5516, "step": 600 }, { "epoch": 1.596537949400799, "eval_loss": 0.29034528136253357, "eval_runtime": 169.9132, "eval_samples_per_second": 2.095, "eval_steps_per_second": 0.524, "eval_wer": 0.2413151364764268, "step": 600 }, { "epoch": 1.663115845539281, "grad_norm": 11.70125675201416, "learning_rate": 9.731111111111113e-06, "loss": 0.4788, "step": 625 }, { "epoch": 1.729693741677763, "grad_norm": 12.724228858947754, "learning_rate": 9.675555555555555e-06, "loss": 0.4391, "step": 650 }, { "epoch": 1.796271637816245, "grad_norm": 13.262675285339355, "learning_rate": 9.620000000000001e-06, "loss": 0.5023, "step": 675 }, { "epoch": 1.862849533954727, "grad_norm": 15.492055892944336, "learning_rate": 9.564444444444445e-06, "loss": 0.493, "step": 700 }, { "epoch": 1.929427430093209, "grad_norm": 13.981544494628906, "learning_rate": 9.508888888888889e-06, "loss": 0.4262, "step": 725 }, { "epoch": 1.996005326231691, "grad_norm": 10.144460678100586, "learning_rate": 9.453333333333335e-06, "loss": 0.4629, "step": 750 }, { "epoch": 2.0612516644474033, "grad_norm": 13.497632026672363, "learning_rate": 9.397777777777779e-06, "loss": 0.2155, "step": 775 }, { "epoch": 2.1278295605858855, "grad_norm": 7.0717878341674805, "learning_rate": 9.342222222222223e-06, "loss": 0.224, "step": 800 }, { "epoch": 2.1278295605858855, "eval_loss": 0.2817358374595642, "eval_runtime": 174.0748, "eval_samples_per_second": 2.045, "eval_steps_per_second": 0.511, "eval_wer": 0.23842018196856907, "step": 800 }, { "epoch": 2.1944074567243677, "grad_norm": 6.820193767547607, "learning_rate": 9.286666666666667e-06, "loss": 0.241, "step": 825 }, { "epoch": 2.2609853528628494, "grad_norm": 8.093194007873535, "learning_rate": 9.231111111111111e-06, "loss": 0.2301, "step": 850 }, { "epoch": 2.3275632490013316, "grad_norm": 6.863702774047852, "learning_rate": 9.175555555555557e-06, "loss": 0.2417, "step": 875 }, { "epoch": 2.3941411451398134, "grad_norm": 8.645722389221191, "learning_rate": 9.12e-06, "loss": 0.2456, "step": 900 }, { "epoch": 2.4607190412782955, "grad_norm": 6.348605632781982, "learning_rate": 9.064444444444447e-06, "loss": 0.2535, "step": 925 }, { "epoch": 2.5272969374167777, "grad_norm": 12.011576652526855, "learning_rate": 9.008888888888889e-06, "loss": 0.241, "step": 950 }, { "epoch": 2.5938748335552595, "grad_norm": 8.227922439575195, "learning_rate": 8.953333333333335e-06, "loss": 0.1779, "step": 975 }, { "epoch": 2.6604527296937417, "grad_norm": 10.178849220275879, "learning_rate": 8.897777777777779e-06, "loss": 0.2413, "step": 1000 }, { "epoch": 2.6604527296937417, "eval_loss": 0.25605687499046326, "eval_runtime": 166.4206, "eval_samples_per_second": 2.139, "eval_steps_per_second": 0.535, "eval_wer": 0.19520264681555005, "step": 1000 }, { "epoch": 2.7270306258322234, "grad_norm": 8.071171760559082, "learning_rate": 8.842222222222223e-06, "loss": 0.1885, "step": 1025 }, { "epoch": 2.7936085219707056, "grad_norm": 10.7284574508667, "learning_rate": 8.786666666666668e-06, "loss": 0.2392, "step": 1050 }, { "epoch": 2.860186418109188, "grad_norm": 6.85145902633667, "learning_rate": 8.73111111111111e-06, "loss": 0.2225, "step": 1075 }, { "epoch": 2.92676431424767, "grad_norm": 11.033931732177734, "learning_rate": 8.675555555555556e-06, "loss": 0.2241, "step": 1100 }, { "epoch": 2.993342210386152, "grad_norm": 7.744363307952881, "learning_rate": 8.62e-06, "loss": 0.1949, "step": 1125 }, { "epoch": 3.0585885486018642, "grad_norm": 7.1467084884643555, "learning_rate": 8.564444444444445e-06, "loss": 0.1003, "step": 1150 }, { "epoch": 3.125166444740346, "grad_norm": 4.110116004943848, "learning_rate": 8.50888888888889e-06, "loss": 0.0878, "step": 1175 }, { "epoch": 3.191744340878828, "grad_norm": 4.550497531890869, "learning_rate": 8.453333333333334e-06, "loss": 0.1036, "step": 1200 }, { "epoch": 3.191744340878828, "eval_loss": 0.2583100497722626, "eval_runtime": 166.1164, "eval_samples_per_second": 2.143, "eval_steps_per_second": 0.536, "eval_wer": 0.19044665012406947, "step": 1200 }, { "epoch": 3.2583222370173104, "grad_norm": 5.678071975708008, "learning_rate": 8.397777777777778e-06, "loss": 0.0985, "step": 1225 }, { "epoch": 3.324900133155792, "grad_norm": 6.956425189971924, "learning_rate": 8.342222222222222e-06, "loss": 0.0963, "step": 1250 }, { "epoch": 3.3914780292942743, "grad_norm": 5.823480606079102, "learning_rate": 8.286666666666668e-06, "loss": 0.1072, "step": 1275 }, { "epoch": 3.458055925432756, "grad_norm": 3.2603001594543457, "learning_rate": 8.231111111111112e-06, "loss": 0.0844, "step": 1300 }, { "epoch": 3.5246338215712383, "grad_norm": 3.718132257461548, "learning_rate": 8.175555555555556e-06, "loss": 0.1595, "step": 1325 }, { "epoch": 3.5912117177097205, "grad_norm": 7.773962497711182, "learning_rate": 8.120000000000002e-06, "loss": 0.1304, "step": 1350 }, { "epoch": 3.6577896138482027, "grad_norm": 8.074212074279785, "learning_rate": 8.064444444444444e-06, "loss": 0.0951, "step": 1375 }, { "epoch": 3.7243675099866844, "grad_norm": 3.417116165161133, "learning_rate": 8.00888888888889e-06, "loss": 0.1135, "step": 1400 }, { "epoch": 3.7243675099866844, "eval_loss": 0.2636599540710449, "eval_runtime": 173.0821, "eval_samples_per_second": 2.057, "eval_steps_per_second": 0.514, "eval_wer": 0.2119520264681555, "step": 1400 }, { "epoch": 3.790945406125166, "grad_norm": 3.8932766914367676, "learning_rate": 7.953333333333334e-06, "loss": 0.1156, "step": 1425 }, { "epoch": 3.8575233022636484, "grad_norm": 5.214465618133545, "learning_rate": 7.897777777777778e-06, "loss": 0.1334, "step": 1450 }, { "epoch": 3.9241011984021306, "grad_norm": 4.3109331130981445, "learning_rate": 7.842222222222224e-06, "loss": 0.1106, "step": 1475 }, { "epoch": 3.9906790945406128, "grad_norm": 5.095810413360596, "learning_rate": 7.786666666666666e-06, "loss": 0.1038, "step": 1500 }, { "epoch": 4.055925432756325, "grad_norm": 3.1624755859375, "learning_rate": 7.731111111111112e-06, "loss": 0.0806, "step": 1525 }, { "epoch": 4.1225033288948065, "grad_norm": 6.265628814697266, "learning_rate": 7.675555555555556e-06, "loss": 0.0626, "step": 1550 }, { "epoch": 4.189081225033289, "grad_norm": 3.3597617149353027, "learning_rate": 7.620000000000001e-06, "loss": 0.0594, "step": 1575 }, { "epoch": 4.255659121171771, "grad_norm": 6.631519794464111, "learning_rate": 7.564444444444446e-06, "loss": 0.057, "step": 1600 }, { "epoch": 4.255659121171771, "eval_loss": 0.273118257522583, "eval_runtime": 172.5497, "eval_samples_per_second": 2.063, "eval_steps_per_second": 0.516, "eval_wer": 0.20967741935483872, "step": 1600 }, { "epoch": 4.255659121171771, "step": 1600, "total_flos": 1.303516587884544e+19, "train_loss": 0.610494866669178, "train_runtime": 3998.2703, "train_samples_per_second": 10.004, "train_steps_per_second": 1.251 } ], "logging_steps": 25, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 14, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.303516587884544e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }