mms-1b-lozgen-balanced-model / trainer_state.json
csikasote's picture
End of training
88fb2b7 verified
{
"best_metric": 0.5324379205703735,
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-lozgen-balanced-model/checkpoint-2300",
"epoch": 21.13821138211382,
"eval_steps": 100,
"global_step": 2600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.8130081300813008,
"grad_norm": 2.741861343383789,
"learning_rate": 0.00028799999999999995,
"loss": 6.6518,
"step": 100
},
{
"epoch": 0.8130081300813008,
"eval_loss": 3.1670827865600586,
"eval_runtime": 20.9246,
"eval_samples_per_second": 13.142,
"eval_steps_per_second": 3.298,
"eval_wer": 0.9942789034564958,
"step": 100
},
{
"epoch": 1.6260162601626016,
"grad_norm": 1.7816331386566162,
"learning_rate": 0.0002919777158774373,
"loss": 2.6718,
"step": 200
},
{
"epoch": 1.6260162601626016,
"eval_loss": 2.2608625888824463,
"eval_runtime": 20.9525,
"eval_samples_per_second": 13.125,
"eval_steps_per_second": 3.293,
"eval_wer": 0.931585220500596,
"step": 200
},
{
"epoch": 2.4390243902439024,
"grad_norm": 1.4348615407943726,
"learning_rate": 0.0002836211699164345,
"loss": 1.4567,
"step": 300
},
{
"epoch": 2.4390243902439024,
"eval_loss": 0.740405261516571,
"eval_runtime": 20.9256,
"eval_samples_per_second": 13.142,
"eval_steps_per_second": 3.297,
"eval_wer": 0.7191895113230036,
"step": 300
},
{
"epoch": 3.252032520325203,
"grad_norm": 2.2145164012908936,
"learning_rate": 0.0002752646239554317,
"loss": 0.7044,
"step": 400
},
{
"epoch": 3.252032520325203,
"eval_loss": 0.6401560306549072,
"eval_runtime": 20.8861,
"eval_samples_per_second": 13.167,
"eval_steps_per_second": 3.304,
"eval_wer": 0.532061978545888,
"step": 400
},
{
"epoch": 4.065040650406504,
"grad_norm": 1.0280542373657227,
"learning_rate": 0.00026690807799442895,
"loss": 0.6221,
"step": 500
},
{
"epoch": 4.065040650406504,
"eval_loss": 0.6065016984939575,
"eval_runtime": 20.9642,
"eval_samples_per_second": 13.118,
"eval_steps_per_second": 3.291,
"eval_wer": 0.5165673420738975,
"step": 500
},
{
"epoch": 4.878048780487805,
"grad_norm": 1.9754825830459595,
"learning_rate": 0.00025855153203342614,
"loss": 0.6016,
"step": 600
},
{
"epoch": 4.878048780487805,
"eval_loss": 0.5947937965393066,
"eval_runtime": 20.9641,
"eval_samples_per_second": 13.118,
"eval_steps_per_second": 3.291,
"eval_wer": 0.4786650774731824,
"step": 600
},
{
"epoch": 5.691056910569106,
"grad_norm": 14.851791381835938,
"learning_rate": 0.0002502785515320334,
"loss": 0.5686,
"step": 700
},
{
"epoch": 5.691056910569106,
"eval_loss": 0.580583393573761,
"eval_runtime": 20.7413,
"eval_samples_per_second": 13.259,
"eval_steps_per_second": 3.327,
"eval_wer": 0.4641239570917759,
"step": 700
},
{
"epoch": 6.504065040650406,
"grad_norm": 0.918157696723938,
"learning_rate": 0.00024192200557103064,
"loss": 0.6054,
"step": 800
},
{
"epoch": 6.504065040650406,
"eval_loss": 0.5715970396995544,
"eval_runtime": 20.8551,
"eval_samples_per_second": 13.186,
"eval_steps_per_second": 3.309,
"eval_wer": 0.44862932061978544,
"step": 800
},
{
"epoch": 7.317073170731708,
"grad_norm": 1.2295010089874268,
"learning_rate": 0.00023356545961002783,
"loss": 0.4871,
"step": 900
},
{
"epoch": 7.317073170731708,
"eval_loss": 0.5732107162475586,
"eval_runtime": 20.9032,
"eval_samples_per_second": 13.156,
"eval_steps_per_second": 3.301,
"eval_wer": 0.4445768772348033,
"step": 900
},
{
"epoch": 8.130081300813009,
"grad_norm": 1.2786221504211426,
"learning_rate": 0.00022520891364902505,
"loss": 0.5275,
"step": 1000
},
{
"epoch": 8.130081300813009,
"eval_loss": 0.5667155385017395,
"eval_runtime": 20.9254,
"eval_samples_per_second": 13.142,
"eval_steps_per_second": 3.297,
"eval_wer": 0.43504171632896305,
"step": 1000
},
{
"epoch": 8.94308943089431,
"grad_norm": 2.170154571533203,
"learning_rate": 0.00021685236768802224,
"loss": 0.5199,
"step": 1100
},
{
"epoch": 8.94308943089431,
"eval_loss": 0.5688361525535583,
"eval_runtime": 20.7636,
"eval_samples_per_second": 13.244,
"eval_steps_per_second": 3.323,
"eval_wer": 0.4300357568533969,
"step": 1100
},
{
"epoch": 9.75609756097561,
"grad_norm": 0.6021554470062256,
"learning_rate": 0.0002084958217270195,
"loss": 0.5031,
"step": 1200
},
{
"epoch": 9.75609756097561,
"eval_loss": 0.5516279935836792,
"eval_runtime": 20.7896,
"eval_samples_per_second": 13.228,
"eval_steps_per_second": 3.319,
"eval_wer": 0.4443384982121573,
"step": 1200
},
{
"epoch": 10.56910569105691,
"grad_norm": 0.8330698609352112,
"learning_rate": 0.00020013927576601671,
"loss": 0.4533,
"step": 1300
},
{
"epoch": 10.56910569105691,
"eval_loss": 0.557715117931366,
"eval_runtime": 21.0144,
"eval_samples_per_second": 13.086,
"eval_steps_per_second": 3.283,
"eval_wer": 0.41787842669845054,
"step": 1300
},
{
"epoch": 11.382113821138212,
"grad_norm": 1.5642321109771729,
"learning_rate": 0.0001917827298050139,
"loss": 0.4738,
"step": 1400
},
{
"epoch": 11.382113821138212,
"eval_loss": 0.553620457649231,
"eval_runtime": 20.9609,
"eval_samples_per_second": 13.12,
"eval_steps_per_second": 3.292,
"eval_wer": 0.4057210965435042,
"step": 1400
},
{
"epoch": 12.195121951219512,
"grad_norm": 6.336544513702393,
"learning_rate": 0.00018342618384401113,
"loss": 0.4925,
"step": 1500
},
{
"epoch": 12.195121951219512,
"eval_loss": 0.5502671003341675,
"eval_runtime": 20.8854,
"eval_samples_per_second": 13.167,
"eval_steps_per_second": 3.304,
"eval_wer": 0.39690107270560193,
"step": 1500
},
{
"epoch": 13.008130081300813,
"grad_norm": 1.8177986145019531,
"learning_rate": 0.00017506963788300832,
"loss": 0.441,
"step": 1600
},
{
"epoch": 13.008130081300813,
"eval_loss": 0.5402783155441284,
"eval_runtime": 20.8181,
"eval_samples_per_second": 13.21,
"eval_steps_per_second": 3.314,
"eval_wer": 0.40047675804529204,
"step": 1600
},
{
"epoch": 13.821138211382113,
"grad_norm": 0.8598074316978455,
"learning_rate": 0.00016671309192200557,
"loss": 0.4177,
"step": 1700
},
{
"epoch": 13.821138211382113,
"eval_loss": 0.5563377737998962,
"eval_runtime": 20.9575,
"eval_samples_per_second": 13.122,
"eval_steps_per_second": 3.292,
"eval_wer": 0.3914183551847437,
"step": 1700
},
{
"epoch": 14.634146341463415,
"grad_norm": 2.3619320392608643,
"learning_rate": 0.00015835654596100277,
"loss": 0.4589,
"step": 1800
},
{
"epoch": 14.634146341463415,
"eval_loss": 0.5394493341445923,
"eval_runtime": 21.0011,
"eval_samples_per_second": 13.095,
"eval_steps_per_second": 3.286,
"eval_wer": 0.3876042908224076,
"step": 1800
},
{
"epoch": 15.447154471544716,
"grad_norm": 3.2565107345581055,
"learning_rate": 0.00015,
"loss": 0.4131,
"step": 1900
},
{
"epoch": 15.447154471544716,
"eval_loss": 0.5424718856811523,
"eval_runtime": 20.8003,
"eval_samples_per_second": 13.221,
"eval_steps_per_second": 3.317,
"eval_wer": 0.3957091775923719,
"step": 1900
},
{
"epoch": 16.260162601626018,
"grad_norm": 1.814285397529602,
"learning_rate": 0.0001416434540389972,
"loss": 0.393,
"step": 2000
},
{
"epoch": 16.260162601626018,
"eval_loss": 0.546908974647522,
"eval_runtime": 20.881,
"eval_samples_per_second": 13.17,
"eval_steps_per_second": 3.304,
"eval_wer": 0.3907032181168057,
"step": 2000
},
{
"epoch": 17.073170731707318,
"grad_norm": 0.762374997138977,
"learning_rate": 0.00013328690807799443,
"loss": 0.4235,
"step": 2100
},
{
"epoch": 17.073170731707318,
"eval_loss": 0.5356802940368652,
"eval_runtime": 20.9384,
"eval_samples_per_second": 13.134,
"eval_steps_per_second": 3.295,
"eval_wer": 0.3878426698450536,
"step": 2100
},
{
"epoch": 17.88617886178862,
"grad_norm": 0.8089348077774048,
"learning_rate": 0.00012493036211699162,
"loss": 0.4113,
"step": 2200
},
{
"epoch": 17.88617886178862,
"eval_loss": 0.5390728712081909,
"eval_runtime": 20.9645,
"eval_samples_per_second": 13.117,
"eval_steps_per_second": 3.291,
"eval_wer": 0.3802145411203814,
"step": 2200
},
{
"epoch": 18.69918699186992,
"grad_norm": 1.4261620044708252,
"learning_rate": 0.00011657381615598886,
"loss": 0.3781,
"step": 2300
},
{
"epoch": 18.69918699186992,
"eval_loss": 0.5324379205703735,
"eval_runtime": 20.8837,
"eval_samples_per_second": 13.168,
"eval_steps_per_second": 3.304,
"eval_wer": 0.3728247914183552,
"step": 2300
},
{
"epoch": 19.51219512195122,
"grad_norm": 0.8106199502944946,
"learning_rate": 0.00010821727019498607,
"loss": 0.3706,
"step": 2400
},
{
"epoch": 19.51219512195122,
"eval_loss": 0.5462723970413208,
"eval_runtime": 20.8289,
"eval_samples_per_second": 13.203,
"eval_steps_per_second": 3.313,
"eval_wer": 0.38259833134684146,
"step": 2400
},
{
"epoch": 20.32520325203252,
"grad_norm": 1.8274589776992798,
"learning_rate": 9.986072423398329e-05,
"loss": 0.3617,
"step": 2500
},
{
"epoch": 20.32520325203252,
"eval_loss": 0.5391108989715576,
"eval_runtime": 21.0365,
"eval_samples_per_second": 13.072,
"eval_steps_per_second": 3.28,
"eval_wer": 0.3697258641239571,
"step": 2500
},
{
"epoch": 21.13821138211382,
"grad_norm": 2.291131019592285,
"learning_rate": 9.15041782729805e-05,
"loss": 0.401,
"step": 2600
},
{
"epoch": 21.13821138211382,
"eval_loss": 0.5417346358299255,
"eval_runtime": 21.0259,
"eval_samples_per_second": 13.079,
"eval_steps_per_second": 3.282,
"eval_wer": 0.366388557806913,
"step": 2600
},
{
"epoch": 21.13821138211382,
"step": 2600,
"total_flos": 7.953394513554737e+18,
"train_loss": 0.8388280222966121,
"train_runtime": 2359.9883,
"train_samples_per_second": 6.242,
"train_steps_per_second": 1.564
}
],
"logging_steps": 100,
"max_steps": 3690,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 400,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 1
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 7.953394513554737e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}