|
{ |
|
"best_metric": 0.7473347783088684, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/xls-r-1b-bigcgen-female-5hrs-model/checkpoint-1200", |
|
"epoch": 6.779661016949152, |
|
"eval_steps": 100, |
|
"global_step": 1600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.423728813559322, |
|
"eval_loss": 3.731572389602661, |
|
"eval_runtime": 24.6238, |
|
"eval_samples_per_second": 13.605, |
|
"eval_steps_per_second": 3.411, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.847457627118644, |
|
"eval_loss": 2.833482027053833, |
|
"eval_runtime": 24.2449, |
|
"eval_samples_per_second": 13.817, |
|
"eval_steps_per_second": 3.465, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.271186440677966, |
|
"eval_loss": 2.0309665203094482, |
|
"eval_runtime": 24.3043, |
|
"eval_samples_per_second": 13.784, |
|
"eval_steps_per_second": 3.456, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.694915254237288, |
|
"eval_loss": 1.1695457696914673, |
|
"eval_runtime": 24.5393, |
|
"eval_samples_per_second": 13.652, |
|
"eval_steps_per_second": 3.423, |
|
"eval_wer": 0.9975206611570248, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.1186440677966103, |
|
"grad_norm": 5.734997272491455, |
|
"learning_rate": 3e-05, |
|
"loss": 5.5638, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.1186440677966103, |
|
"eval_loss": 1.157854676246643, |
|
"eval_runtime": 25.7403, |
|
"eval_samples_per_second": 13.015, |
|
"eval_steps_per_second": 3.263, |
|
"eval_wer": 0.9917355371900827, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.542372881355932, |
|
"eval_loss": 1.117336392402649, |
|
"eval_runtime": 24.578, |
|
"eval_samples_per_second": 13.63, |
|
"eval_steps_per_second": 3.418, |
|
"eval_wer": 0.9931129476584022, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.9661016949152543, |
|
"eval_loss": 0.9185360074043274, |
|
"eval_runtime": 24.6366, |
|
"eval_samples_per_second": 13.598, |
|
"eval_steps_per_second": 3.41, |
|
"eval_wer": 0.7611570247933884, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.389830508474576, |
|
"eval_loss": 0.8460559248924255, |
|
"eval_runtime": 24.6486, |
|
"eval_samples_per_second": 13.591, |
|
"eval_steps_per_second": 3.408, |
|
"eval_wer": 0.8633608815426997, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.8135593220338984, |
|
"eval_loss": 0.7551252841949463, |
|
"eval_runtime": 24.7997, |
|
"eval_samples_per_second": 13.508, |
|
"eval_steps_per_second": 3.387, |
|
"eval_wer": 0.7391184573002755, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.237288135593221, |
|
"grad_norm": 2.491547107696533, |
|
"learning_rate": 2.7720364741641338e-05, |
|
"loss": 1.2967, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.237288135593221, |
|
"eval_loss": 0.8461477756500244, |
|
"eval_runtime": 24.6325, |
|
"eval_samples_per_second": 13.6, |
|
"eval_steps_per_second": 3.41, |
|
"eval_wer": 0.8179063360881542, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.661016949152542, |
|
"eval_loss": 1.0125970840454102, |
|
"eval_runtime": 24.7518, |
|
"eval_samples_per_second": 13.534, |
|
"eval_steps_per_second": 3.394, |
|
"eval_wer": 0.931129476584022, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.084745762711864, |
|
"eval_loss": 0.7473347783088684, |
|
"eval_runtime": 24.5889, |
|
"eval_samples_per_second": 13.624, |
|
"eval_steps_per_second": 3.416, |
|
"eval_wer": 0.7077134986225895, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.508474576271187, |
|
"eval_loss": 0.7741428017616272, |
|
"eval_runtime": 24.7431, |
|
"eval_samples_per_second": 13.539, |
|
"eval_steps_per_second": 3.395, |
|
"eval_wer": 0.7567493112947659, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.932203389830509, |
|
"eval_loss": 0.8527082204818726, |
|
"eval_runtime": 24.6476, |
|
"eval_samples_per_second": 13.592, |
|
"eval_steps_per_second": 3.408, |
|
"eval_wer": 0.7520661157024794, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.3559322033898304, |
|
"grad_norm": 7.505873203277588, |
|
"learning_rate": 2.5440729483282676e-05, |
|
"loss": 0.8911, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.3559322033898304, |
|
"eval_loss": 0.9528570771217346, |
|
"eval_runtime": 24.4492, |
|
"eval_samples_per_second": 13.702, |
|
"eval_steps_per_second": 3.436, |
|
"eval_wer": 0.8052341597796143, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.779661016949152, |
|
"eval_loss": 1.0234959125518799, |
|
"eval_runtime": 24.4889, |
|
"eval_samples_per_second": 13.68, |
|
"eval_steps_per_second": 3.43, |
|
"eval_wer": 0.8308539944903581, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.779661016949152, |
|
"step": 1600, |
|
"total_flos": 9.408039870276794e+18, |
|
"train_loss": 2.461955771446228, |
|
"train_runtime": 2270.622, |
|
"train_samples_per_second": 24.931, |
|
"train_steps_per_second": 3.118 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 7080, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 4, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 4 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.408039870276794e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|