|
{ |
|
"best_metric": 0.5856660604476929, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/xls-r-1b-bigcgen-combined-15hrs-model/checkpoint-1200", |
|
"epoch": 2.2566995768688294, |
|
"eval_steps": 100, |
|
"global_step": 1600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14104372355430184, |
|
"eval_loss": 3.8893141746520996, |
|
"eval_runtime": 53.0152, |
|
"eval_samples_per_second": 12.864, |
|
"eval_steps_per_second": 3.225, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2820874471086037, |
|
"eval_loss": 2.6866326332092285, |
|
"eval_runtime": 52.8101, |
|
"eval_samples_per_second": 12.914, |
|
"eval_steps_per_second": 3.238, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4231311706629055, |
|
"eval_loss": 1.4204449653625488, |
|
"eval_runtime": 53.002, |
|
"eval_samples_per_second": 12.867, |
|
"eval_steps_per_second": 3.226, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5641748942172073, |
|
"eval_loss": 0.8780319094657898, |
|
"eval_runtime": 53.1195, |
|
"eval_samples_per_second": 12.839, |
|
"eval_steps_per_second": 3.219, |
|
"eval_wer": 0.8846789749242215, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7052186177715092, |
|
"grad_norm": 4.491549491882324, |
|
"learning_rate": 3e-05, |
|
"loss": 5.5784, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7052186177715092, |
|
"eval_loss": 0.8820701241493225, |
|
"eval_runtime": 53.4186, |
|
"eval_samples_per_second": 12.767, |
|
"eval_steps_per_second": 3.201, |
|
"eval_wer": 0.9582529622485533, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.846262341325811, |
|
"eval_loss": 0.6897755861282349, |
|
"eval_runtime": 53.3341, |
|
"eval_samples_per_second": 12.787, |
|
"eval_steps_per_second": 3.206, |
|
"eval_wer": 0.7508955635161202, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9873060648801129, |
|
"eval_loss": 0.6909680962562561, |
|
"eval_runtime": 53.3024, |
|
"eval_samples_per_second": 12.795, |
|
"eval_steps_per_second": 3.208, |
|
"eval_wer": 0.8689721686414991, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1283497884344147, |
|
"eval_loss": 0.6632267832756042, |
|
"eval_runtime": 53.2656, |
|
"eval_samples_per_second": 12.804, |
|
"eval_steps_per_second": 3.21, |
|
"eval_wer": 0.681041609258749, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2693935119887165, |
|
"eval_loss": 0.6164674758911133, |
|
"eval_runtime": 53.6138, |
|
"eval_samples_per_second": 12.721, |
|
"eval_steps_per_second": 3.189, |
|
"eval_wer": 0.6048498208872968, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4104372355430184, |
|
"grad_norm": 6.964908599853516, |
|
"learning_rate": 2.9277804525758308e-05, |
|
"loss": 1.2954, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.4104372355430184, |
|
"eval_loss": 0.60063636302948, |
|
"eval_runtime": 53.8102, |
|
"eval_samples_per_second": 12.674, |
|
"eval_steps_per_second": 3.178, |
|
"eval_wer": 0.6133921190410582, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5514809590973202, |
|
"eval_loss": 0.6859056353569031, |
|
"eval_runtime": 53.6927, |
|
"eval_samples_per_second": 12.702, |
|
"eval_steps_per_second": 3.185, |
|
"eval_wer": 0.7683934968310829, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.692524682651622, |
|
"eval_loss": 0.5856660604476929, |
|
"eval_runtime": 53.9146, |
|
"eval_samples_per_second": 12.65, |
|
"eval_steps_per_second": 3.172, |
|
"eval_wer": 0.6273077982915404, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.8335684062059239, |
|
"eval_loss": 0.6304970383644104, |
|
"eval_runtime": 53.5646, |
|
"eval_samples_per_second": 12.732, |
|
"eval_steps_per_second": 3.192, |
|
"eval_wer": 0.6154588040782585, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.9746121297602257, |
|
"eval_loss": 0.6212508678436279, |
|
"eval_runtime": 53.3498, |
|
"eval_samples_per_second": 12.784, |
|
"eval_steps_per_second": 3.205, |
|
"eval_wer": 0.5855607605400936, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.1156558533145273, |
|
"grad_norm": 4.57673978805542, |
|
"learning_rate": 2.855560905151661e-05, |
|
"loss": 1.1582, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.1156558533145273, |
|
"eval_loss": 0.5891425609588623, |
|
"eval_runtime": 53.7029, |
|
"eval_samples_per_second": 12.699, |
|
"eval_steps_per_second": 3.184, |
|
"eval_wer": 0.5983742077707357, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.2566995768688294, |
|
"eval_loss": 0.6605567932128906, |
|
"eval_runtime": 53.1853, |
|
"eval_samples_per_second": 12.823, |
|
"eval_steps_per_second": 3.215, |
|
"eval_wer": 0.7040507026729127, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.2566995768688294, |
|
"step": 1600, |
|
"total_flos": 9.492183580866103e+18, |
|
"train_loss": 2.5750148630142213, |
|
"train_runtime": 3143.2537, |
|
"train_samples_per_second": 54.106, |
|
"train_steps_per_second": 6.767 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 21270, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 4, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 4 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.492183580866103e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|