{ "best_metric": 0.42670491337776184, "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-lozgen-combined-model/checkpoint-2900", "epoch": 13.008130081300813, "eval_steps": 100, "global_step": 3200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4065040650406504, "grad_norm": 5.229663372039795, "learning_rate": 0.00029099999999999997, "loss": 6.5686, "step": 100 }, { "epoch": 0.4065040650406504, "eval_loss": 3.0826728343963623, "eval_runtime": 40.6972, "eval_samples_per_second": 13.588, "eval_steps_per_second": 3.415, "eval_wer": 0.970125974805039, "step": 100 }, { "epoch": 0.8130081300813008, "grad_norm": 2.938697576522827, "learning_rate": 0.0002960027472527472, "loss": 2.6223, "step": 200 }, { "epoch": 0.8130081300813008, "eval_loss": 2.23785662651062, "eval_runtime": 40.5049, "eval_samples_per_second": 13.653, "eval_steps_per_second": 3.432, "eval_wer": 0.9112177564487103, "step": 200 }, { "epoch": 1.2195121951219512, "grad_norm": 2.3466386795043945, "learning_rate": 0.0002918818681318681, "loss": 1.4386, "step": 300 }, { "epoch": 1.2195121951219512, "eval_loss": 0.6909840106964111, "eval_runtime": 40.4202, "eval_samples_per_second": 13.681, "eval_steps_per_second": 3.439, "eval_wer": 0.7809238152369526, "step": 300 }, { "epoch": 1.6260162601626016, "grad_norm": 9.536104202270508, "learning_rate": 0.000287760989010989, "loss": 0.8073, "step": 400 }, { "epoch": 1.6260162601626016, "eval_loss": 0.5903483629226685, "eval_runtime": 40.294, "eval_samples_per_second": 13.724, "eval_steps_per_second": 3.45, "eval_wer": 0.5698860227954409, "step": 400 }, { "epoch": 2.032520325203252, "grad_norm": 1.1359059810638428, "learning_rate": 0.00028364010989010986, "loss": 0.651, "step": 500 }, { "epoch": 2.032520325203252, "eval_loss": 0.5555368065834045, "eval_runtime": 40.6146, "eval_samples_per_second": 13.616, "eval_steps_per_second": 3.422, "eval_wer": 0.5036592681463707, "step": 500 }, { "epoch": 2.4390243902439024, "grad_norm": 1.2806068658828735, "learning_rate": 0.00027951923076923076, "loss": 0.655, "step": 600 }, { "epoch": 2.4390243902439024, "eval_loss": 0.529751718044281, "eval_runtime": 40.5683, "eval_samples_per_second": 13.631, "eval_steps_per_second": 3.426, "eval_wer": 0.4818236352729454, "step": 600 }, { "epoch": 2.845528455284553, "grad_norm": 0.6596723198890686, "learning_rate": 0.0002753983516483516, "loss": 0.6579, "step": 700 }, { "epoch": 2.845528455284553, "eval_loss": 0.5297980308532715, "eval_runtime": 40.2452, "eval_samples_per_second": 13.741, "eval_steps_per_second": 3.454, "eval_wer": 0.4603479304139172, "step": 700 }, { "epoch": 3.252032520325203, "grad_norm": 1.53997802734375, "learning_rate": 0.0002712774725274725, "loss": 0.5699, "step": 800 }, { "epoch": 3.252032520325203, "eval_loss": 0.5159797072410583, "eval_runtime": 40.3095, "eval_samples_per_second": 13.719, "eval_steps_per_second": 3.448, "eval_wer": 0.4284343131373725, "step": 800 }, { "epoch": 3.658536585365854, "grad_norm": 5.62721061706543, "learning_rate": 0.00026715659340659334, "loss": 0.6104, "step": 900 }, { "epoch": 3.658536585365854, "eval_loss": 0.5069619417190552, "eval_runtime": 40.7288, "eval_samples_per_second": 13.578, "eval_steps_per_second": 3.413, "eval_wer": 0.4320335932813437, "step": 900 }, { "epoch": 4.065040650406504, "grad_norm": 0.9546043276786804, "learning_rate": 0.00026303571428571424, "loss": 0.604, "step": 1000 }, { "epoch": 4.065040650406504, "eval_loss": 0.49775609374046326, "eval_runtime": 40.5226, "eval_samples_per_second": 13.647, "eval_steps_per_second": 3.43, "eval_wer": 0.4098380323935213, "step": 1000 }, { "epoch": 4.471544715447155, "grad_norm": 1.088158130645752, "learning_rate": 0.00025895604395604396, "loss": 0.5681, "step": 1100 }, { "epoch": 4.471544715447155, "eval_loss": 0.49754026532173157, "eval_runtime": 40.4172, "eval_samples_per_second": 13.682, "eval_steps_per_second": 3.439, "eval_wer": 0.40719856028794243, "step": 1100 }, { "epoch": 4.878048780487805, "grad_norm": 1.1959774494171143, "learning_rate": 0.0002548351648351648, "loss": 0.5493, "step": 1200 }, { "epoch": 4.878048780487805, "eval_loss": 0.4878113865852356, "eval_runtime": 40.4064, "eval_samples_per_second": 13.686, "eval_steps_per_second": 3.44, "eval_wer": 0.40383923215356926, "step": 1200 }, { "epoch": 5.284552845528455, "grad_norm": 0.7603219747543335, "learning_rate": 0.0002507142857142857, "loss": 0.581, "step": 1300 }, { "epoch": 5.284552845528455, "eval_loss": 0.48260873556137085, "eval_runtime": 40.7585, "eval_samples_per_second": 13.568, "eval_steps_per_second": 3.41, "eval_wer": 0.39652069586082783, "step": 1300 }, { "epoch": 5.691056910569106, "grad_norm": 1.554537296295166, "learning_rate": 0.00024659340659340654, "loss": 0.5746, "step": 1400 }, { "epoch": 5.691056910569106, "eval_loss": 0.47932884097099304, "eval_runtime": 41.051, "eval_samples_per_second": 13.471, "eval_steps_per_second": 3.386, "eval_wer": 0.4242351529694061, "step": 1400 }, { "epoch": 6.097560975609756, "grad_norm": 1.0625553131103516, "learning_rate": 0.00024247252747252747, "loss": 0.5238, "step": 1500 }, { "epoch": 6.097560975609756, "eval_loss": 0.472389817237854, "eval_runtime": 40.6042, "eval_samples_per_second": 13.619, "eval_steps_per_second": 3.423, "eval_wer": 0.3833233353329334, "step": 1500 }, { "epoch": 6.504065040650406, "grad_norm": 1.2422752380371094, "learning_rate": 0.0002383516483516483, "loss": 0.5204, "step": 1600 }, { "epoch": 6.504065040650406, "eval_loss": 0.48664653301239014, "eval_runtime": 40.5474, "eval_samples_per_second": 13.638, "eval_steps_per_second": 3.428, "eval_wer": 0.3864427114577085, "step": 1600 }, { "epoch": 6.9105691056910565, "grad_norm": 1.324337363243103, "learning_rate": 0.000234271978021978, "loss": 0.5563, "step": 1700 }, { "epoch": 6.9105691056910565, "eval_loss": 0.46718040108680725, "eval_runtime": 40.8357, "eval_samples_per_second": 13.542, "eval_steps_per_second": 3.404, "eval_wer": 0.38392321535692864, "step": 1700 }, { "epoch": 7.317073170731708, "grad_norm": 1.2208648920059204, "learning_rate": 0.0002301510989010989, "loss": 0.5121, "step": 1800 }, { "epoch": 7.317073170731708, "eval_loss": 0.466389536857605, "eval_runtime": 40.8903, "eval_samples_per_second": 13.524, "eval_steps_per_second": 3.399, "eval_wer": 0.3719256148770246, "step": 1800 }, { "epoch": 7.723577235772358, "grad_norm": 1.342207908630371, "learning_rate": 0.00022603021978021977, "loss": 0.4774, "step": 1900 }, { "epoch": 7.723577235772358, "eval_loss": 0.4625222682952881, "eval_runtime": 40.6308, "eval_samples_per_second": 13.61, "eval_steps_per_second": 3.421, "eval_wer": 0.36520695860827834, "step": 1900 }, { "epoch": 8.130081300813009, "grad_norm": 1.1112520694732666, "learning_rate": 0.00022190934065934064, "loss": 0.5356, "step": 2000 }, { "epoch": 8.130081300813009, "eval_loss": 0.4720795750617981, "eval_runtime": 40.7009, "eval_samples_per_second": 13.587, "eval_steps_per_second": 3.415, "eval_wer": 0.3692861427714457, "step": 2000 }, { "epoch": 8.536585365853659, "grad_norm": 1.3869178295135498, "learning_rate": 0.0002177884615384615, "loss": 0.4385, "step": 2100 }, { "epoch": 8.536585365853659, "eval_loss": 0.4559965133666992, "eval_runtime": 40.8418, "eval_samples_per_second": 13.54, "eval_steps_per_second": 3.403, "eval_wer": 0.3695260947810438, "step": 2100 }, { "epoch": 8.94308943089431, "grad_norm": 1.1107573509216309, "learning_rate": 0.0002136675824175824, "loss": 0.5561, "step": 2200 }, { "epoch": 8.94308943089431, "eval_loss": 0.4452870488166809, "eval_runtime": 40.9093, "eval_samples_per_second": 13.518, "eval_steps_per_second": 3.398, "eval_wer": 0.3594481103779244, "step": 2200 }, { "epoch": 9.34959349593496, "grad_norm": 1.0222277641296387, "learning_rate": 0.00020954670329670325, "loss": 0.414, "step": 2300 }, { "epoch": 9.34959349593496, "eval_loss": 0.44891127943992615, "eval_runtime": 40.5136, "eval_samples_per_second": 13.65, "eval_steps_per_second": 3.431, "eval_wer": 0.3546490701859628, "step": 2300 }, { "epoch": 9.75609756097561, "grad_norm": 1.8858979940414429, "learning_rate": 0.00020542582417582415, "loss": 0.4763, "step": 2400 }, { "epoch": 9.75609756097561, "eval_loss": 0.4524897038936615, "eval_runtime": 40.6457, "eval_samples_per_second": 13.605, "eval_steps_per_second": 3.42, "eval_wer": 0.352129574085183, "step": 2400 }, { "epoch": 10.16260162601626, "grad_norm": 1.759278655052185, "learning_rate": 0.00020130494505494502, "loss": 0.5317, "step": 2500 }, { "epoch": 10.16260162601626, "eval_loss": 0.44243165850639343, "eval_runtime": 40.8637, "eval_samples_per_second": 13.533, "eval_steps_per_second": 3.402, "eval_wer": 0.3557288542291542, "step": 2500 }, { "epoch": 10.56910569105691, "grad_norm": 1.8020973205566406, "learning_rate": 0.00019718406593406592, "loss": 0.4939, "step": 2600 }, { "epoch": 10.56910569105691, "eval_loss": 0.43977972865104675, "eval_runtime": 40.8341, "eval_samples_per_second": 13.543, "eval_steps_per_second": 3.404, "eval_wer": 0.3502099580083983, "step": 2600 }, { "epoch": 10.975609756097562, "grad_norm": 1.453413724899292, "learning_rate": 0.00019306318681318682, "loss": 0.4456, "step": 2700 }, { "epoch": 10.975609756097562, "eval_loss": 0.4414619505405426, "eval_runtime": 40.6722, "eval_samples_per_second": 13.597, "eval_steps_per_second": 3.418, "eval_wer": 0.34673065386922614, "step": 2700 }, { "epoch": 11.382113821138212, "grad_norm": 1.151430606842041, "learning_rate": 0.00018898351648351645, "loss": 0.4583, "step": 2800 }, { "epoch": 11.382113821138212, "eval_loss": 0.4501504600048065, "eval_runtime": 40.6207, "eval_samples_per_second": 13.614, "eval_steps_per_second": 3.422, "eval_wer": 0.34457108578284346, "step": 2800 }, { "epoch": 11.788617886178862, "grad_norm": 4.184691429138184, "learning_rate": 0.00018486263736263735, "loss": 0.4573, "step": 2900 }, { "epoch": 11.788617886178862, "eval_loss": 0.42670491337776184, "eval_runtime": 40.8415, "eval_samples_per_second": 13.54, "eval_steps_per_second": 3.403, "eval_wer": 0.340251949610078, "step": 2900 }, { "epoch": 12.195121951219512, "grad_norm": 16.549129486083984, "learning_rate": 0.00018074175824175822, "loss": 0.398, "step": 3000 }, { "epoch": 12.195121951219512, "eval_loss": 0.4304564595222473, "eval_runtime": 40.8815, "eval_samples_per_second": 13.527, "eval_steps_per_second": 3.4, "eval_wer": 0.3406118776244751, "step": 3000 }, { "epoch": 12.601626016260163, "grad_norm": 3.6663782596588135, "learning_rate": 0.00017662087912087912, "loss": 0.472, "step": 3100 }, { "epoch": 12.601626016260163, "eval_loss": 0.42684391140937805, "eval_runtime": 40.8829, "eval_samples_per_second": 13.526, "eval_steps_per_second": 3.4, "eval_wer": 0.3319736052789442, "step": 3100 }, { "epoch": 13.008130081300813, "grad_norm": 0.5571214556694031, "learning_rate": 0.00017249999999999996, "loss": 0.3993, "step": 3200 }, { "epoch": 13.008130081300813, "eval_loss": 0.42878594994544983, "eval_runtime": 40.7789, "eval_samples_per_second": 13.561, "eval_steps_per_second": 3.409, "eval_wer": 0.32969406118776245, "step": 3200 }, { "epoch": 13.008130081300813, "step": 3200, "total_flos": 9.286133731448259e+18, "train_loss": 0.8163932430744171, "train_runtime": 3897.3821, "train_samples_per_second": 7.559, "train_steps_per_second": 1.894 } ], "logging_steps": 100, "max_steps": 7380, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.286133731448259e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }