|
{ |
|
"best_metric": 0.42670491337776184, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-lozgen-combined-model/checkpoint-2900", |
|
"epoch": 13.008130081300813, |
|
"eval_steps": 100, |
|
"global_step": 3200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4065040650406504, |
|
"grad_norm": 5.229663372039795, |
|
"learning_rate": 0.00029099999999999997, |
|
"loss": 6.5686, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4065040650406504, |
|
"eval_loss": 3.0826728343963623, |
|
"eval_runtime": 40.6972, |
|
"eval_samples_per_second": 13.588, |
|
"eval_steps_per_second": 3.415, |
|
"eval_wer": 0.970125974805039, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"grad_norm": 2.938697576522827, |
|
"learning_rate": 0.0002960027472527472, |
|
"loss": 2.6223, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"eval_loss": 2.23785662651062, |
|
"eval_runtime": 40.5049, |
|
"eval_samples_per_second": 13.653, |
|
"eval_steps_per_second": 3.432, |
|
"eval_wer": 0.9112177564487103, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": 2.3466386795043945, |
|
"learning_rate": 0.0002918818681318681, |
|
"loss": 1.4386, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"eval_loss": 0.6909840106964111, |
|
"eval_runtime": 40.4202, |
|
"eval_samples_per_second": 13.681, |
|
"eval_steps_per_second": 3.439, |
|
"eval_wer": 0.7809238152369526, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6260162601626016, |
|
"grad_norm": 9.536104202270508, |
|
"learning_rate": 0.000287760989010989, |
|
"loss": 0.8073, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.6260162601626016, |
|
"eval_loss": 0.5903483629226685, |
|
"eval_runtime": 40.294, |
|
"eval_samples_per_second": 13.724, |
|
"eval_steps_per_second": 3.45, |
|
"eval_wer": 0.5698860227954409, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.032520325203252, |
|
"grad_norm": 1.1359059810638428, |
|
"learning_rate": 0.00028364010989010986, |
|
"loss": 0.651, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.032520325203252, |
|
"eval_loss": 0.5555368065834045, |
|
"eval_runtime": 40.6146, |
|
"eval_samples_per_second": 13.616, |
|
"eval_steps_per_second": 3.422, |
|
"eval_wer": 0.5036592681463707, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": 1.2806068658828735, |
|
"learning_rate": 0.00027951923076923076, |
|
"loss": 0.655, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"eval_loss": 0.529751718044281, |
|
"eval_runtime": 40.5683, |
|
"eval_samples_per_second": 13.631, |
|
"eval_steps_per_second": 3.426, |
|
"eval_wer": 0.4818236352729454, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.845528455284553, |
|
"grad_norm": 0.6596723198890686, |
|
"learning_rate": 0.0002753983516483516, |
|
"loss": 0.6579, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.845528455284553, |
|
"eval_loss": 0.5297980308532715, |
|
"eval_runtime": 40.2452, |
|
"eval_samples_per_second": 13.741, |
|
"eval_steps_per_second": 3.454, |
|
"eval_wer": 0.4603479304139172, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.252032520325203, |
|
"grad_norm": 1.53997802734375, |
|
"learning_rate": 0.0002712774725274725, |
|
"loss": 0.5699, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.252032520325203, |
|
"eval_loss": 0.5159797072410583, |
|
"eval_runtime": 40.3095, |
|
"eval_samples_per_second": 13.719, |
|
"eval_steps_per_second": 3.448, |
|
"eval_wer": 0.4284343131373725, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.658536585365854, |
|
"grad_norm": 5.62721061706543, |
|
"learning_rate": 0.00026715659340659334, |
|
"loss": 0.6104, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.658536585365854, |
|
"eval_loss": 0.5069619417190552, |
|
"eval_runtime": 40.7288, |
|
"eval_samples_per_second": 13.578, |
|
"eval_steps_per_second": 3.413, |
|
"eval_wer": 0.4320335932813437, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.065040650406504, |
|
"grad_norm": 0.9546043276786804, |
|
"learning_rate": 0.00026303571428571424, |
|
"loss": 0.604, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.065040650406504, |
|
"eval_loss": 0.49775609374046326, |
|
"eval_runtime": 40.5226, |
|
"eval_samples_per_second": 13.647, |
|
"eval_steps_per_second": 3.43, |
|
"eval_wer": 0.4098380323935213, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.471544715447155, |
|
"grad_norm": 1.088158130645752, |
|
"learning_rate": 0.00025895604395604396, |
|
"loss": 0.5681, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.471544715447155, |
|
"eval_loss": 0.49754026532173157, |
|
"eval_runtime": 40.4172, |
|
"eval_samples_per_second": 13.682, |
|
"eval_steps_per_second": 3.439, |
|
"eval_wer": 0.40719856028794243, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.878048780487805, |
|
"grad_norm": 1.1959774494171143, |
|
"learning_rate": 0.0002548351648351648, |
|
"loss": 0.5493, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.878048780487805, |
|
"eval_loss": 0.4878113865852356, |
|
"eval_runtime": 40.4064, |
|
"eval_samples_per_second": 13.686, |
|
"eval_steps_per_second": 3.44, |
|
"eval_wer": 0.40383923215356926, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.284552845528455, |
|
"grad_norm": 0.7603219747543335, |
|
"learning_rate": 0.0002507142857142857, |
|
"loss": 0.581, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.284552845528455, |
|
"eval_loss": 0.48260873556137085, |
|
"eval_runtime": 40.7585, |
|
"eval_samples_per_second": 13.568, |
|
"eval_steps_per_second": 3.41, |
|
"eval_wer": 0.39652069586082783, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.691056910569106, |
|
"grad_norm": 1.554537296295166, |
|
"learning_rate": 0.00024659340659340654, |
|
"loss": 0.5746, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.691056910569106, |
|
"eval_loss": 0.47932884097099304, |
|
"eval_runtime": 41.051, |
|
"eval_samples_per_second": 13.471, |
|
"eval_steps_per_second": 3.386, |
|
"eval_wer": 0.4242351529694061, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.097560975609756, |
|
"grad_norm": 1.0625553131103516, |
|
"learning_rate": 0.00024247252747252747, |
|
"loss": 0.5238, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.097560975609756, |
|
"eval_loss": 0.472389817237854, |
|
"eval_runtime": 40.6042, |
|
"eval_samples_per_second": 13.619, |
|
"eval_steps_per_second": 3.423, |
|
"eval_wer": 0.3833233353329334, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.504065040650406, |
|
"grad_norm": 1.2422752380371094, |
|
"learning_rate": 0.0002383516483516483, |
|
"loss": 0.5204, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.504065040650406, |
|
"eval_loss": 0.48664653301239014, |
|
"eval_runtime": 40.5474, |
|
"eval_samples_per_second": 13.638, |
|
"eval_steps_per_second": 3.428, |
|
"eval_wer": 0.3864427114577085, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.9105691056910565, |
|
"grad_norm": 1.324337363243103, |
|
"learning_rate": 0.000234271978021978, |
|
"loss": 0.5563, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.9105691056910565, |
|
"eval_loss": 0.46718040108680725, |
|
"eval_runtime": 40.8357, |
|
"eval_samples_per_second": 13.542, |
|
"eval_steps_per_second": 3.404, |
|
"eval_wer": 0.38392321535692864, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.317073170731708, |
|
"grad_norm": 1.2208648920059204, |
|
"learning_rate": 0.0002301510989010989, |
|
"loss": 0.5121, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.317073170731708, |
|
"eval_loss": 0.466389536857605, |
|
"eval_runtime": 40.8903, |
|
"eval_samples_per_second": 13.524, |
|
"eval_steps_per_second": 3.399, |
|
"eval_wer": 0.3719256148770246, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.723577235772358, |
|
"grad_norm": 1.342207908630371, |
|
"learning_rate": 0.00022603021978021977, |
|
"loss": 0.4774, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7.723577235772358, |
|
"eval_loss": 0.4625222682952881, |
|
"eval_runtime": 40.6308, |
|
"eval_samples_per_second": 13.61, |
|
"eval_steps_per_second": 3.421, |
|
"eval_wer": 0.36520695860827834, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.130081300813009, |
|
"grad_norm": 1.1112520694732666, |
|
"learning_rate": 0.00022190934065934064, |
|
"loss": 0.5356, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.130081300813009, |
|
"eval_loss": 0.4720795750617981, |
|
"eval_runtime": 40.7009, |
|
"eval_samples_per_second": 13.587, |
|
"eval_steps_per_second": 3.415, |
|
"eval_wer": 0.3692861427714457, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.536585365853659, |
|
"grad_norm": 1.3869178295135498, |
|
"learning_rate": 0.0002177884615384615, |
|
"loss": 0.4385, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.536585365853659, |
|
"eval_loss": 0.4559965133666992, |
|
"eval_runtime": 40.8418, |
|
"eval_samples_per_second": 13.54, |
|
"eval_steps_per_second": 3.403, |
|
"eval_wer": 0.3695260947810438, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.94308943089431, |
|
"grad_norm": 1.1107573509216309, |
|
"learning_rate": 0.0002136675824175824, |
|
"loss": 0.5561, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.94308943089431, |
|
"eval_loss": 0.4452870488166809, |
|
"eval_runtime": 40.9093, |
|
"eval_samples_per_second": 13.518, |
|
"eval_steps_per_second": 3.398, |
|
"eval_wer": 0.3594481103779244, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.34959349593496, |
|
"grad_norm": 1.0222277641296387, |
|
"learning_rate": 0.00020954670329670325, |
|
"loss": 0.414, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.34959349593496, |
|
"eval_loss": 0.44891127943992615, |
|
"eval_runtime": 40.5136, |
|
"eval_samples_per_second": 13.65, |
|
"eval_steps_per_second": 3.431, |
|
"eval_wer": 0.3546490701859628, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.75609756097561, |
|
"grad_norm": 1.8858979940414429, |
|
"learning_rate": 0.00020542582417582415, |
|
"loss": 0.4763, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.75609756097561, |
|
"eval_loss": 0.4524897038936615, |
|
"eval_runtime": 40.6457, |
|
"eval_samples_per_second": 13.605, |
|
"eval_steps_per_second": 3.42, |
|
"eval_wer": 0.352129574085183, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.16260162601626, |
|
"grad_norm": 1.759278655052185, |
|
"learning_rate": 0.00020130494505494502, |
|
"loss": 0.5317, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.16260162601626, |
|
"eval_loss": 0.44243165850639343, |
|
"eval_runtime": 40.8637, |
|
"eval_samples_per_second": 13.533, |
|
"eval_steps_per_second": 3.402, |
|
"eval_wer": 0.3557288542291542, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.56910569105691, |
|
"grad_norm": 1.8020973205566406, |
|
"learning_rate": 0.00019718406593406592, |
|
"loss": 0.4939, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10.56910569105691, |
|
"eval_loss": 0.43977972865104675, |
|
"eval_runtime": 40.8341, |
|
"eval_samples_per_second": 13.543, |
|
"eval_steps_per_second": 3.404, |
|
"eval_wer": 0.3502099580083983, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10.975609756097562, |
|
"grad_norm": 1.453413724899292, |
|
"learning_rate": 0.00019306318681318682, |
|
"loss": 0.4456, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 10.975609756097562, |
|
"eval_loss": 0.4414619505405426, |
|
"eval_runtime": 40.6722, |
|
"eval_samples_per_second": 13.597, |
|
"eval_steps_per_second": 3.418, |
|
"eval_wer": 0.34673065386922614, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 11.382113821138212, |
|
"grad_norm": 1.151430606842041, |
|
"learning_rate": 0.00018898351648351645, |
|
"loss": 0.4583, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11.382113821138212, |
|
"eval_loss": 0.4501504600048065, |
|
"eval_runtime": 40.6207, |
|
"eval_samples_per_second": 13.614, |
|
"eval_steps_per_second": 3.422, |
|
"eval_wer": 0.34457108578284346, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11.788617886178862, |
|
"grad_norm": 4.184691429138184, |
|
"learning_rate": 0.00018486263736263735, |
|
"loss": 0.4573, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 11.788617886178862, |
|
"eval_loss": 0.42670491337776184, |
|
"eval_runtime": 40.8415, |
|
"eval_samples_per_second": 13.54, |
|
"eval_steps_per_second": 3.403, |
|
"eval_wer": 0.340251949610078, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 12.195121951219512, |
|
"grad_norm": 16.549129486083984, |
|
"learning_rate": 0.00018074175824175822, |
|
"loss": 0.398, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.195121951219512, |
|
"eval_loss": 0.4304564595222473, |
|
"eval_runtime": 40.8815, |
|
"eval_samples_per_second": 13.527, |
|
"eval_steps_per_second": 3.4, |
|
"eval_wer": 0.3406118776244751, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.601626016260163, |
|
"grad_norm": 3.6663782596588135, |
|
"learning_rate": 0.00017662087912087912, |
|
"loss": 0.472, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 12.601626016260163, |
|
"eval_loss": 0.42684391140937805, |
|
"eval_runtime": 40.8829, |
|
"eval_samples_per_second": 13.526, |
|
"eval_steps_per_second": 3.4, |
|
"eval_wer": 0.3319736052789442, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 13.008130081300813, |
|
"grad_norm": 0.5571214556694031, |
|
"learning_rate": 0.00017249999999999996, |
|
"loss": 0.3993, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 13.008130081300813, |
|
"eval_loss": 0.42878594994544983, |
|
"eval_runtime": 40.7789, |
|
"eval_samples_per_second": 13.561, |
|
"eval_steps_per_second": 3.409, |
|
"eval_wer": 0.32969406118776245, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 13.008130081300813, |
|
"step": 3200, |
|
"total_flos": 9.286133731448259e+18, |
|
"train_loss": 0.8163932430744171, |
|
"train_runtime": 3897.3821, |
|
"train_samples_per_second": 7.559, |
|
"train_steps_per_second": 1.894 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 7380, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.286133731448259e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|