csikasote's picture
End of training
34428f4 verified
{
"best_metric": 0.25605687499046326,
"best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-nyagen-combined-model/checkpoint-1000",
"epoch": 4.255659121171771,
"eval_steps": 200,
"global_step": 1600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06657789613848203,
"grad_norm": 56.093265533447266,
"learning_rate": 4.2000000000000006e-07,
"loss": 5.267,
"step": 25
},
{
"epoch": 0.13315579227696406,
"grad_norm": 34.71885681152344,
"learning_rate": 9.200000000000001e-07,
"loss": 4.1657,
"step": 50
},
{
"epoch": 0.19973368841544606,
"grad_norm": 31.17400360107422,
"learning_rate": 1.42e-06,
"loss": 3.1786,
"step": 75
},
{
"epoch": 0.2663115845539281,
"grad_norm": 30.04043960571289,
"learning_rate": 1.9200000000000003e-06,
"loss": 2.1171,
"step": 100
},
{
"epoch": 0.33288948069241014,
"grad_norm": 21.41140365600586,
"learning_rate": 2.42e-06,
"loss": 1.7518,
"step": 125
},
{
"epoch": 0.3994673768308921,
"grad_norm": 20.285459518432617,
"learning_rate": 2.92e-06,
"loss": 1.3976,
"step": 150
},
{
"epoch": 0.46604527296937415,
"grad_norm": 19.031597137451172,
"learning_rate": 3.4200000000000007e-06,
"loss": 1.2585,
"step": 175
},
{
"epoch": 0.5326231691078562,
"grad_norm": 22.678789138793945,
"learning_rate": 3.920000000000001e-06,
"loss": 1.1602,
"step": 200
},
{
"epoch": 0.5326231691078562,
"eval_loss": 0.5182062983512878,
"eval_runtime": 169.8362,
"eval_samples_per_second": 2.096,
"eval_steps_per_second": 0.524,
"eval_wer": 0.3746898263027295,
"step": 200
},
{
"epoch": 0.5992010652463382,
"grad_norm": 19.633346557617188,
"learning_rate": 4.42e-06,
"loss": 0.976,
"step": 225
},
{
"epoch": 0.6657789613848203,
"grad_norm": 22.2548828125,
"learning_rate": 4.92e-06,
"loss": 0.9464,
"step": 250
},
{
"epoch": 0.7323568575233023,
"grad_norm": 19.41946029663086,
"learning_rate": 5.420000000000001e-06,
"loss": 0.8527,
"step": 275
},
{
"epoch": 0.7989347536617842,
"grad_norm": 20.135112762451172,
"learning_rate": 5.92e-06,
"loss": 0.8483,
"step": 300
},
{
"epoch": 0.8655126498002663,
"grad_norm": 17.5079345703125,
"learning_rate": 6.42e-06,
"loss": 0.9001,
"step": 325
},
{
"epoch": 0.9320905459387483,
"grad_norm": 21.997474670410156,
"learning_rate": 6.92e-06,
"loss": 0.761,
"step": 350
},
{
"epoch": 0.9986684420772304,
"grad_norm": 20.054590225219727,
"learning_rate": 7.420000000000001e-06,
"loss": 0.7337,
"step": 375
},
{
"epoch": 1.0639147802929427,
"grad_norm": 16.24369239807129,
"learning_rate": 7.92e-06,
"loss": 0.5456,
"step": 400
},
{
"epoch": 1.0639147802929427,
"eval_loss": 0.3444797098636627,
"eval_runtime": 166.2672,
"eval_samples_per_second": 2.141,
"eval_steps_per_second": 0.535,
"eval_wer": 0.25516956162117455,
"step": 400
},
{
"epoch": 1.1304926764314247,
"grad_norm": 17.575782775878906,
"learning_rate": 8.42e-06,
"loss": 0.4736,
"step": 425
},
{
"epoch": 1.1970705725699067,
"grad_norm": 12.86981201171875,
"learning_rate": 8.920000000000001e-06,
"loss": 0.535,
"step": 450
},
{
"epoch": 1.2636484687083889,
"grad_norm": 15.951074600219727,
"learning_rate": 9.42e-06,
"loss": 0.5233,
"step": 475
},
{
"epoch": 1.3302263648468708,
"grad_norm": 18.161441802978516,
"learning_rate": 9.920000000000002e-06,
"loss": 0.5483,
"step": 500
},
{
"epoch": 1.3968042609853528,
"grad_norm": 13.9922513961792,
"learning_rate": 9.953333333333333e-06,
"loss": 0.5045,
"step": 525
},
{
"epoch": 1.463382157123835,
"grad_norm": 19.316190719604492,
"learning_rate": 9.89777777777778e-06,
"loss": 0.4916,
"step": 550
},
{
"epoch": 1.5299600532623168,
"grad_norm": 14.279507637023926,
"learning_rate": 9.842222222222223e-06,
"loss": 0.5007,
"step": 575
},
{
"epoch": 1.596537949400799,
"grad_norm": 15.150153160095215,
"learning_rate": 9.786666666666667e-06,
"loss": 0.5516,
"step": 600
},
{
"epoch": 1.596537949400799,
"eval_loss": 0.29034528136253357,
"eval_runtime": 169.9132,
"eval_samples_per_second": 2.095,
"eval_steps_per_second": 0.524,
"eval_wer": 0.2413151364764268,
"step": 600
},
{
"epoch": 1.663115845539281,
"grad_norm": 11.70125675201416,
"learning_rate": 9.731111111111113e-06,
"loss": 0.4788,
"step": 625
},
{
"epoch": 1.729693741677763,
"grad_norm": 12.724228858947754,
"learning_rate": 9.675555555555555e-06,
"loss": 0.4391,
"step": 650
},
{
"epoch": 1.796271637816245,
"grad_norm": 13.262675285339355,
"learning_rate": 9.620000000000001e-06,
"loss": 0.5023,
"step": 675
},
{
"epoch": 1.862849533954727,
"grad_norm": 15.492055892944336,
"learning_rate": 9.564444444444445e-06,
"loss": 0.493,
"step": 700
},
{
"epoch": 1.929427430093209,
"grad_norm": 13.981544494628906,
"learning_rate": 9.508888888888889e-06,
"loss": 0.4262,
"step": 725
},
{
"epoch": 1.996005326231691,
"grad_norm": 10.144460678100586,
"learning_rate": 9.453333333333335e-06,
"loss": 0.4629,
"step": 750
},
{
"epoch": 2.0612516644474033,
"grad_norm": 13.497632026672363,
"learning_rate": 9.397777777777779e-06,
"loss": 0.2155,
"step": 775
},
{
"epoch": 2.1278295605858855,
"grad_norm": 7.0717878341674805,
"learning_rate": 9.342222222222223e-06,
"loss": 0.224,
"step": 800
},
{
"epoch": 2.1278295605858855,
"eval_loss": 0.2817358374595642,
"eval_runtime": 174.0748,
"eval_samples_per_second": 2.045,
"eval_steps_per_second": 0.511,
"eval_wer": 0.23842018196856907,
"step": 800
},
{
"epoch": 2.1944074567243677,
"grad_norm": 6.820193767547607,
"learning_rate": 9.286666666666667e-06,
"loss": 0.241,
"step": 825
},
{
"epoch": 2.2609853528628494,
"grad_norm": 8.093194007873535,
"learning_rate": 9.231111111111111e-06,
"loss": 0.2301,
"step": 850
},
{
"epoch": 2.3275632490013316,
"grad_norm": 6.863702774047852,
"learning_rate": 9.175555555555557e-06,
"loss": 0.2417,
"step": 875
},
{
"epoch": 2.3941411451398134,
"grad_norm": 8.645722389221191,
"learning_rate": 9.12e-06,
"loss": 0.2456,
"step": 900
},
{
"epoch": 2.4607190412782955,
"grad_norm": 6.348605632781982,
"learning_rate": 9.064444444444447e-06,
"loss": 0.2535,
"step": 925
},
{
"epoch": 2.5272969374167777,
"grad_norm": 12.011576652526855,
"learning_rate": 9.008888888888889e-06,
"loss": 0.241,
"step": 950
},
{
"epoch": 2.5938748335552595,
"grad_norm": 8.227922439575195,
"learning_rate": 8.953333333333335e-06,
"loss": 0.1779,
"step": 975
},
{
"epoch": 2.6604527296937417,
"grad_norm": 10.178849220275879,
"learning_rate": 8.897777777777779e-06,
"loss": 0.2413,
"step": 1000
},
{
"epoch": 2.6604527296937417,
"eval_loss": 0.25605687499046326,
"eval_runtime": 166.4206,
"eval_samples_per_second": 2.139,
"eval_steps_per_second": 0.535,
"eval_wer": 0.19520264681555005,
"step": 1000
},
{
"epoch": 2.7270306258322234,
"grad_norm": 8.071171760559082,
"learning_rate": 8.842222222222223e-06,
"loss": 0.1885,
"step": 1025
},
{
"epoch": 2.7936085219707056,
"grad_norm": 10.7284574508667,
"learning_rate": 8.786666666666668e-06,
"loss": 0.2392,
"step": 1050
},
{
"epoch": 2.860186418109188,
"grad_norm": 6.85145902633667,
"learning_rate": 8.73111111111111e-06,
"loss": 0.2225,
"step": 1075
},
{
"epoch": 2.92676431424767,
"grad_norm": 11.033931732177734,
"learning_rate": 8.675555555555556e-06,
"loss": 0.2241,
"step": 1100
},
{
"epoch": 2.993342210386152,
"grad_norm": 7.744363307952881,
"learning_rate": 8.62e-06,
"loss": 0.1949,
"step": 1125
},
{
"epoch": 3.0585885486018642,
"grad_norm": 7.1467084884643555,
"learning_rate": 8.564444444444445e-06,
"loss": 0.1003,
"step": 1150
},
{
"epoch": 3.125166444740346,
"grad_norm": 4.110116004943848,
"learning_rate": 8.50888888888889e-06,
"loss": 0.0878,
"step": 1175
},
{
"epoch": 3.191744340878828,
"grad_norm": 4.550497531890869,
"learning_rate": 8.453333333333334e-06,
"loss": 0.1036,
"step": 1200
},
{
"epoch": 3.191744340878828,
"eval_loss": 0.2583100497722626,
"eval_runtime": 166.1164,
"eval_samples_per_second": 2.143,
"eval_steps_per_second": 0.536,
"eval_wer": 0.19044665012406947,
"step": 1200
},
{
"epoch": 3.2583222370173104,
"grad_norm": 5.678071975708008,
"learning_rate": 8.397777777777778e-06,
"loss": 0.0985,
"step": 1225
},
{
"epoch": 3.324900133155792,
"grad_norm": 6.956425189971924,
"learning_rate": 8.342222222222222e-06,
"loss": 0.0963,
"step": 1250
},
{
"epoch": 3.3914780292942743,
"grad_norm": 5.823480606079102,
"learning_rate": 8.286666666666668e-06,
"loss": 0.1072,
"step": 1275
},
{
"epoch": 3.458055925432756,
"grad_norm": 3.2603001594543457,
"learning_rate": 8.231111111111112e-06,
"loss": 0.0844,
"step": 1300
},
{
"epoch": 3.5246338215712383,
"grad_norm": 3.718132257461548,
"learning_rate": 8.175555555555556e-06,
"loss": 0.1595,
"step": 1325
},
{
"epoch": 3.5912117177097205,
"grad_norm": 7.773962497711182,
"learning_rate": 8.120000000000002e-06,
"loss": 0.1304,
"step": 1350
},
{
"epoch": 3.6577896138482027,
"grad_norm": 8.074212074279785,
"learning_rate": 8.064444444444444e-06,
"loss": 0.0951,
"step": 1375
},
{
"epoch": 3.7243675099866844,
"grad_norm": 3.417116165161133,
"learning_rate": 8.00888888888889e-06,
"loss": 0.1135,
"step": 1400
},
{
"epoch": 3.7243675099866844,
"eval_loss": 0.2636599540710449,
"eval_runtime": 173.0821,
"eval_samples_per_second": 2.057,
"eval_steps_per_second": 0.514,
"eval_wer": 0.2119520264681555,
"step": 1400
},
{
"epoch": 3.790945406125166,
"grad_norm": 3.8932766914367676,
"learning_rate": 7.953333333333334e-06,
"loss": 0.1156,
"step": 1425
},
{
"epoch": 3.8575233022636484,
"grad_norm": 5.214465618133545,
"learning_rate": 7.897777777777778e-06,
"loss": 0.1334,
"step": 1450
},
{
"epoch": 3.9241011984021306,
"grad_norm": 4.3109331130981445,
"learning_rate": 7.842222222222224e-06,
"loss": 0.1106,
"step": 1475
},
{
"epoch": 3.9906790945406128,
"grad_norm": 5.095810413360596,
"learning_rate": 7.786666666666666e-06,
"loss": 0.1038,
"step": 1500
},
{
"epoch": 4.055925432756325,
"grad_norm": 3.1624755859375,
"learning_rate": 7.731111111111112e-06,
"loss": 0.0806,
"step": 1525
},
{
"epoch": 4.1225033288948065,
"grad_norm": 6.265628814697266,
"learning_rate": 7.675555555555556e-06,
"loss": 0.0626,
"step": 1550
},
{
"epoch": 4.189081225033289,
"grad_norm": 3.3597617149353027,
"learning_rate": 7.620000000000001e-06,
"loss": 0.0594,
"step": 1575
},
{
"epoch": 4.255659121171771,
"grad_norm": 6.631519794464111,
"learning_rate": 7.564444444444446e-06,
"loss": 0.057,
"step": 1600
},
{
"epoch": 4.255659121171771,
"eval_loss": 0.273118257522583,
"eval_runtime": 172.5497,
"eval_samples_per_second": 2.063,
"eval_steps_per_second": 0.516,
"eval_wer": 0.20967741935483872,
"step": 1600
},
{
"epoch": 4.255659121171771,
"step": 1600,
"total_flos": 1.303516587884544e+19,
"train_loss": 0.610494866669178,
"train_runtime": 3998.2703,
"train_samples_per_second": 10.004,
"train_steps_per_second": 1.251
}
],
"logging_steps": 25,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 14,
"save_steps": 200,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.303516587884544e+19,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}