{ "best_metric": 96.39777735198314, "best_model_checkpoint": "./whisper-small-ha-v3/checkpoint-1000", "epoch": 12.738853503184714, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1592356687898089, "grad_norm": 202.8230438232422, "learning_rate": 0.0002, "loss": 3.0913, "step": 25 }, { "epoch": 0.3184713375796178, "grad_norm": 20.680511474609375, "learning_rate": 0.00045000000000000004, "loss": 2.931, "step": 50 }, { "epoch": 0.47770700636942676, "grad_norm": 47.82948303222656, "learning_rate": 0.0005, "loss": 3.5464, "step": 75 }, { "epoch": 0.6369426751592356, "grad_norm": 10.464394569396973, "learning_rate": 0.0005, "loss": 5.0146, "step": 100 }, { "epoch": 0.7961783439490446, "grad_norm": 9.31248664855957, "learning_rate": 0.0005, "loss": 3.5674, "step": 125 }, { "epoch": 0.9554140127388535, "grad_norm": 8.952727317810059, "learning_rate": 0.0005, "loss": 3.5191, "step": 150 }, { "epoch": 1.1146496815286624, "grad_norm": 8.769633293151855, "learning_rate": 0.0005, "loss": 2.9062, "step": 175 }, { "epoch": 1.2738853503184713, "grad_norm": 8.326931953430176, "learning_rate": 0.0005, "loss": 2.7467, "step": 200 }, { "epoch": 1.4331210191082802, "grad_norm": 8.937690734863281, "learning_rate": 0.0005, "loss": 2.8934, "step": 225 }, { "epoch": 1.5923566878980893, "grad_norm": 7.839733600616455, "learning_rate": 0.0005, "loss": 2.9366, "step": 250 }, { "epoch": 1.7515923566878981, "grad_norm": 7.158330917358398, "learning_rate": 0.0005, "loss": 2.8352, "step": 275 }, { "epoch": 1.910828025477707, "grad_norm": 7.30534029006958, "learning_rate": 0.0005, "loss": 2.9709, "step": 300 }, { "epoch": 2.070063694267516, "grad_norm": 7.713191509246826, "learning_rate": 0.0005, "loss": 2.4781, "step": 325 }, { "epoch": 2.229299363057325, "grad_norm": 7.518861293792725, "learning_rate": 0.0005, "loss": 2.0545, "step": 350 }, { "epoch": 2.388535031847134, "grad_norm": 8.62714958190918, "learning_rate": 0.0005, "loss": 2.1755, "step": 375 }, { "epoch": 2.5477707006369426, "grad_norm": 8.683177947998047, "learning_rate": 0.0005, "loss": 2.2715, "step": 400 }, { "epoch": 2.7070063694267517, "grad_norm": 7.4501142501831055, "learning_rate": 0.0005, "loss": 2.3387, "step": 425 }, { "epoch": 2.8662420382165603, "grad_norm": 8.682276725769043, "learning_rate": 0.0005, "loss": 2.3904, "step": 450 }, { "epoch": 3.0254777070063694, "grad_norm": 7.98887825012207, "learning_rate": 0.0005, "loss": 2.192, "step": 475 }, { "epoch": 3.1847133757961785, "grad_norm": 8.835172653198242, "learning_rate": 0.0005, "loss": 1.5826, "step": 500 }, { "epoch": 3.1847133757961785, "eval_loss": 3.9010848999023438, "eval_runtime": 343.8775, "eval_samples_per_second": 1.919, "eval_steps_per_second": 0.122, "eval_wer": 180.03448936577888, "eval_wer_ortho": 183.0078125, "step": 500 }, { "epoch": 3.343949044585987, "grad_norm": 7.396618843078613, "learning_rate": 0.0005, "loss": 1.6942, "step": 525 }, { "epoch": 3.5031847133757963, "grad_norm": 7.276708602905273, "learning_rate": 0.0005, "loss": 1.7758, "step": 550 }, { "epoch": 3.662420382165605, "grad_norm": 8.082304000854492, "learning_rate": 0.0005, "loss": 1.8561, "step": 575 }, { "epoch": 3.821656050955414, "grad_norm": 7.295908451080322, "learning_rate": 0.0005, "loss": 1.9064, "step": 600 }, { "epoch": 3.980891719745223, "grad_norm": 7.9326491355896, "learning_rate": 0.0005, "loss": 1.9251, "step": 625 }, { "epoch": 4.140127388535032, "grad_norm": 7.216257095336914, "learning_rate": 0.0005, "loss": 1.3606, "step": 650 }, { "epoch": 4.2993630573248405, "grad_norm": 6.987307548522949, "learning_rate": 0.0005, "loss": 1.382, "step": 675 }, { "epoch": 4.45859872611465, "grad_norm": 8.181788444519043, "learning_rate": 0.0005, "loss": 1.4235, "step": 700 }, { "epoch": 4.617834394904459, "grad_norm": 7.226937294006348, "learning_rate": 0.0005, "loss": 1.5352, "step": 725 }, { "epoch": 4.777070063694268, "grad_norm": 7.664785385131836, "learning_rate": 0.0005, "loss": 1.5803, "step": 750 }, { "epoch": 4.936305732484076, "grad_norm": 8.353466987609863, "learning_rate": 0.0005, "loss": 1.6793, "step": 775 }, { "epoch": 5.095541401273885, "grad_norm": 7.170167922973633, "learning_rate": 0.0005, "loss": 1.3162, "step": 800 }, { "epoch": 5.254777070063694, "grad_norm": 7.019118309020996, "learning_rate": 0.0005, "loss": 1.1988, "step": 825 }, { "epoch": 5.414012738853503, "grad_norm": 6.39375638961792, "learning_rate": 0.0005, "loss": 1.2476, "step": 850 }, { "epoch": 5.573248407643312, "grad_norm": 7.22137451171875, "learning_rate": 0.0005, "loss": 1.325, "step": 875 }, { "epoch": 5.732484076433121, "grad_norm": 6.7961883544921875, "learning_rate": 0.0005, "loss": 1.3379, "step": 900 }, { "epoch": 5.89171974522293, "grad_norm": 7.7992377281188965, "learning_rate": 0.0005, "loss": 1.3845, "step": 925 }, { "epoch": 6.050955414012739, "grad_norm": 6.209515571594238, "learning_rate": 0.0005, "loss": 1.2692, "step": 950 }, { "epoch": 6.210191082802548, "grad_norm": 6.86682653427124, "learning_rate": 0.0005, "loss": 1.1113, "step": 975 }, { "epoch": 6.369426751592357, "grad_norm": 6.647078037261963, "learning_rate": 0.0005, "loss": 1.1378, "step": 1000 }, { "epoch": 6.369426751592357, "eval_loss": 4.400506019592285, "eval_runtime": 280.0441, "eval_samples_per_second": 2.357, "eval_steps_per_second": 0.15, "eval_wer": 96.39777735198314, "eval_wer_ortho": 96.640625, "step": 1000 }, { "epoch": 6.528662420382165, "grad_norm": 7.238087177276611, "learning_rate": 0.0005, "loss": 1.2028, "step": 1025 }, { "epoch": 6.687898089171974, "grad_norm": 6.603641033172607, "learning_rate": 0.0005, "loss": 1.2026, "step": 1050 }, { "epoch": 6.8471337579617835, "grad_norm": 6.680185794830322, "learning_rate": 0.0005, "loss": 1.2335, "step": 1075 }, { "epoch": 7.006369426751593, "grad_norm": 5.673605442047119, "learning_rate": 0.0005, "loss": 1.2663, "step": 1100 }, { "epoch": 7.165605095541402, "grad_norm": 6.584240913391113, "learning_rate": 0.0005, "loss": 0.9781, "step": 1125 }, { "epoch": 7.32484076433121, "grad_norm": 6.363912582397461, "learning_rate": 0.0005, "loss": 1.04, "step": 1150 }, { "epoch": 7.484076433121019, "grad_norm": 5.395627021789551, "learning_rate": 0.0005, "loss": 1.1184, "step": 1175 }, { "epoch": 7.643312101910828, "grad_norm": 6.268621921539307, "learning_rate": 0.0005, "loss": 1.1184, "step": 1200 }, { "epoch": 7.802547770700637, "grad_norm": 6.605058193206787, "learning_rate": 0.0005, "loss": 1.1101, "step": 1225 }, { "epoch": 7.961783439490446, "grad_norm": 6.293459892272949, "learning_rate": 0.0005, "loss": 1.1396, "step": 1250 }, { "epoch": 8.121019108280255, "grad_norm": 6.258079528808594, "learning_rate": 0.0005, "loss": 0.9623, "step": 1275 }, { "epoch": 8.280254777070065, "grad_norm": 5.724878787994385, "learning_rate": 0.0005, "loss": 0.9693, "step": 1300 }, { "epoch": 8.439490445859873, "grad_norm": 5.03961181640625, "learning_rate": 0.0005, "loss": 0.9993, "step": 1325 }, { "epoch": 8.598726114649681, "grad_norm": 6.147229194641113, "learning_rate": 0.0005, "loss": 1.0598, "step": 1350 }, { "epoch": 8.757961783439491, "grad_norm": 7.066701889038086, "learning_rate": 0.0005, "loss": 1.0835, "step": 1375 }, { "epoch": 8.9171974522293, "grad_norm": 5.424177646636963, "learning_rate": 0.0005, "loss": 1.1042, "step": 1400 }, { "epoch": 9.07643312101911, "grad_norm": 5.849576473236084, "learning_rate": 0.0005, "loss": 0.9797, "step": 1425 }, { "epoch": 9.235668789808917, "grad_norm": 4.9155120849609375, "learning_rate": 0.0005, "loss": 0.8813, "step": 1450 }, { "epoch": 9.394904458598726, "grad_norm": 6.510490417480469, "learning_rate": 0.0005, "loss": 0.9536, "step": 1475 }, { "epoch": 9.554140127388536, "grad_norm": 5.549797058105469, "learning_rate": 0.0005, "loss": 0.9735, "step": 1500 }, { "epoch": 9.554140127388536, "eval_loss": 4.821861267089844, "eval_runtime": 286.5747, "eval_samples_per_second": 2.303, "eval_steps_per_second": 0.147, "eval_wer": 103.04656064380148, "eval_wer_ortho": 104.1015625, "step": 1500 }, { "epoch": 9.713375796178344, "grad_norm": 5.422865390777588, "learning_rate": 0.0005, "loss": 1.0079, "step": 1525 }, { "epoch": 9.872611464968152, "grad_norm": 5.475556373596191, "learning_rate": 0.0005, "loss": 1.0126, "step": 1550 }, { "epoch": 10.031847133757962, "grad_norm": 5.356685161590576, "learning_rate": 0.0005, "loss": 0.9727, "step": 1575 }, { "epoch": 10.19108280254777, "grad_norm": 5.031153202056885, "learning_rate": 0.0005, "loss": 0.8431, "step": 1600 }, { "epoch": 10.35031847133758, "grad_norm": 5.827383518218994, "learning_rate": 0.0005, "loss": 0.8888, "step": 1625 }, { "epoch": 10.509554140127388, "grad_norm": 5.030758857727051, "learning_rate": 0.0005, "loss": 0.9165, "step": 1650 }, { "epoch": 10.668789808917197, "grad_norm": 5.023013114929199, "learning_rate": 0.0005, "loss": 0.9541, "step": 1675 }, { "epoch": 10.828025477707007, "grad_norm": 5.8825602531433105, "learning_rate": 0.0005, "loss": 0.9576, "step": 1700 }, { "epoch": 10.987261146496815, "grad_norm": 5.114201068878174, "learning_rate": 0.0005, "loss": 0.9793, "step": 1725 }, { "epoch": 11.146496815286625, "grad_norm": 4.3722333908081055, "learning_rate": 0.0005, "loss": 0.7909, "step": 1750 }, { "epoch": 11.305732484076433, "grad_norm": 4.9860382080078125, "learning_rate": 0.0005, "loss": 0.858, "step": 1775 }, { "epoch": 11.464968152866241, "grad_norm": 5.144904613494873, "learning_rate": 0.0005, "loss": 0.8842, "step": 1800 }, { "epoch": 11.624203821656051, "grad_norm": 4.30189847946167, "learning_rate": 0.0005, "loss": 0.8925, "step": 1825 }, { "epoch": 11.78343949044586, "grad_norm": 5.091893672943115, "learning_rate": 0.0005, "loss": 0.9188, "step": 1850 }, { "epoch": 11.94267515923567, "grad_norm": 5.179553508758545, "learning_rate": 0.0005, "loss": 0.9309, "step": 1875 }, { "epoch": 12.101910828025478, "grad_norm": 5.5085225105285645, "learning_rate": 0.0005, "loss": 0.8213, "step": 1900 }, { "epoch": 12.261146496815286, "grad_norm": 5.253794193267822, "learning_rate": 0.0005, "loss": 0.8155, "step": 1925 }, { "epoch": 12.420382165605096, "grad_norm": 4.998741149902344, "learning_rate": 0.0005, "loss": 0.8479, "step": 1950 }, { "epoch": 12.579617834394904, "grad_norm": 5.674376010894775, "learning_rate": 0.0005, "loss": 0.8773, "step": 1975 }, { "epoch": 12.738853503184714, "grad_norm": 5.010542869567871, "learning_rate": 0.0005, "loss": 0.8844, "step": 2000 }, { "epoch": 12.738853503184714, "eval_loss": 5.002076148986816, "eval_runtime": 288.6571, "eval_samples_per_second": 2.286, "eval_steps_per_second": 0.146, "eval_wer": 109.04387813757425, "eval_wer_ortho": 108.59375, "step": 2000 } ], "logging_steps": 25, "max_steps": 5652, "num_input_tokens_seen": 0, "num_train_epochs": 36, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.22088071102464e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }