{ "best_metric": 0.7473347783088684, "best_model_checkpoint": "/scratch/skscla001/speech/results/xls-r-1b-bigcgen-female-5hrs-model/checkpoint-1200", "epoch": 6.779661016949152, "eval_steps": 100, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.423728813559322, "eval_loss": 3.731572389602661, "eval_runtime": 24.6238, "eval_samples_per_second": 13.605, "eval_steps_per_second": 3.411, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.847457627118644, "eval_loss": 2.833482027053833, "eval_runtime": 24.2449, "eval_samples_per_second": 13.817, "eval_steps_per_second": 3.465, "eval_wer": 1.0, "step": 200 }, { "epoch": 1.271186440677966, "eval_loss": 2.0309665203094482, "eval_runtime": 24.3043, "eval_samples_per_second": 13.784, "eval_steps_per_second": 3.456, "eval_wer": 1.0, "step": 300 }, { "epoch": 1.694915254237288, "eval_loss": 1.1695457696914673, "eval_runtime": 24.5393, "eval_samples_per_second": 13.652, "eval_steps_per_second": 3.423, "eval_wer": 0.9975206611570248, "step": 400 }, { "epoch": 2.1186440677966103, "grad_norm": 5.734997272491455, "learning_rate": 3e-05, "loss": 5.5638, "step": 500 }, { "epoch": 2.1186440677966103, "eval_loss": 1.157854676246643, "eval_runtime": 25.7403, "eval_samples_per_second": 13.015, "eval_steps_per_second": 3.263, "eval_wer": 0.9917355371900827, "step": 500 }, { "epoch": 2.542372881355932, "eval_loss": 1.117336392402649, "eval_runtime": 24.578, "eval_samples_per_second": 13.63, "eval_steps_per_second": 3.418, "eval_wer": 0.9931129476584022, "step": 600 }, { "epoch": 2.9661016949152543, "eval_loss": 0.9185360074043274, "eval_runtime": 24.6366, "eval_samples_per_second": 13.598, "eval_steps_per_second": 3.41, "eval_wer": 0.7611570247933884, "step": 700 }, { "epoch": 3.389830508474576, "eval_loss": 0.8460559248924255, "eval_runtime": 24.6486, "eval_samples_per_second": 13.591, "eval_steps_per_second": 3.408, "eval_wer": 0.8633608815426997, "step": 800 }, { "epoch": 3.8135593220338984, "eval_loss": 0.7551252841949463, "eval_runtime": 24.7997, "eval_samples_per_second": 13.508, "eval_steps_per_second": 3.387, "eval_wer": 0.7391184573002755, "step": 900 }, { "epoch": 4.237288135593221, "grad_norm": 2.491547107696533, "learning_rate": 2.7720364741641338e-05, "loss": 1.2967, "step": 1000 }, { "epoch": 4.237288135593221, "eval_loss": 0.8461477756500244, "eval_runtime": 24.6325, "eval_samples_per_second": 13.6, "eval_steps_per_second": 3.41, "eval_wer": 0.8179063360881542, "step": 1000 }, { "epoch": 4.661016949152542, "eval_loss": 1.0125970840454102, "eval_runtime": 24.7518, "eval_samples_per_second": 13.534, "eval_steps_per_second": 3.394, "eval_wer": 0.931129476584022, "step": 1100 }, { "epoch": 5.084745762711864, "eval_loss": 0.7473347783088684, "eval_runtime": 24.5889, "eval_samples_per_second": 13.624, "eval_steps_per_second": 3.416, "eval_wer": 0.7077134986225895, "step": 1200 }, { "epoch": 5.508474576271187, "eval_loss": 0.7741428017616272, "eval_runtime": 24.7431, "eval_samples_per_second": 13.539, "eval_steps_per_second": 3.395, "eval_wer": 0.7567493112947659, "step": 1300 }, { "epoch": 5.932203389830509, "eval_loss": 0.8527082204818726, "eval_runtime": 24.6476, "eval_samples_per_second": 13.592, "eval_steps_per_second": 3.408, "eval_wer": 0.7520661157024794, "step": 1400 }, { "epoch": 6.3559322033898304, "grad_norm": 7.505873203277588, "learning_rate": 2.5440729483282676e-05, "loss": 0.8911, "step": 1500 }, { "epoch": 6.3559322033898304, "eval_loss": 0.9528570771217346, "eval_runtime": 24.4492, "eval_samples_per_second": 13.702, "eval_steps_per_second": 3.436, "eval_wer": 0.8052341597796143, "step": 1500 }, { "epoch": 6.779661016949152, "eval_loss": 1.0234959125518799, "eval_runtime": 24.4889, "eval_samples_per_second": 13.68, "eval_steps_per_second": 3.43, "eval_wer": 0.8308539944903581, "step": 1600 }, { "epoch": 6.779661016949152, "step": 1600, "total_flos": 9.408039870276794e+18, "train_loss": 2.461955771446228, "train_runtime": 2270.622, "train_samples_per_second": 24.931, "train_steps_per_second": 3.118 } ], "logging_steps": 500, "max_steps": 7080, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 4 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.408039870276794e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }