{ "best_metric": Infinity, "best_model_checkpoint": null, "epoch": 2.5052631578947366, "eval_steps": 100, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10025062656641603, "grad_norm": 5.436703205108643, "learning_rate": 0.000285, "loss": 14.809, "step": 100 }, { "epoch": 0.10025062656641603, "eval_loss": Infinity, "eval_runtime": 31.2196, "eval_samples_per_second": 15.215, "eval_steps_per_second": 3.812, "eval_wer": 1.0120317820658342, "step": 100 }, { "epoch": 0.20050125313283207, "grad_norm": 4.481522560119629, "learning_rate": 0.00028812499999999997, "loss": 6.0699, "step": 200 }, { "epoch": 0.20050125313283207, "eval_loss": Infinity, "eval_runtime": 31.1416, "eval_samples_per_second": 15.253, "eval_steps_per_second": 3.821, "eval_wer": 0.9993189557321226, "step": 200 }, { "epoch": 0.3007518796992481, "grad_norm": 5.9900593757629395, "learning_rate": 0.00027562499999999994, "loss": 5.3221, "step": 300 }, { "epoch": 0.3007518796992481, "eval_loss": Infinity, "eval_runtime": 31.3083, "eval_samples_per_second": 15.172, "eval_steps_per_second": 3.801, "eval_wer": 1.034733257661748, "step": 300 }, { "epoch": 0.40100250626566414, "grad_norm": 5.1721720695495605, "learning_rate": 0.00026312499999999996, "loss": 4.0785, "step": 400 }, { "epoch": 0.40100250626566414, "eval_loss": Infinity, "eval_runtime": 31.0, "eval_samples_per_second": 15.323, "eval_steps_per_second": 3.839, "eval_wer": 0.7820658342792282, "step": 400 }, { "epoch": 0.5012531328320802, "grad_norm": 3.2461183071136475, "learning_rate": 0.000250625, "loss": 2.0743, "step": 500 }, { "epoch": 0.5012531328320802, "eval_loss": Infinity, "eval_runtime": 31.1476, "eval_samples_per_second": 15.25, "eval_steps_per_second": 3.821, "eval_wer": 0.608172531214529, "step": 500 }, { "epoch": 0.6015037593984962, "grad_norm": 4.59557580947876, "learning_rate": 0.00023812499999999997, "loss": 1.7578, "step": 600 }, { "epoch": 0.6015037593984962, "eval_loss": Infinity, "eval_runtime": 30.9949, "eval_samples_per_second": 15.325, "eval_steps_per_second": 3.839, "eval_wer": 0.5732122587968218, "step": 600 }, { "epoch": 0.7017543859649122, "grad_norm": 3.777493715286255, "learning_rate": 0.00022562499999999997, "loss": 1.8081, "step": 700 }, { "epoch": 0.7017543859649122, "eval_loss": Infinity, "eval_runtime": 30.8383, "eval_samples_per_second": 15.403, "eval_steps_per_second": 3.859, "eval_wer": 0.5659477866061294, "step": 700 }, { "epoch": 0.8020050125313283, "grad_norm": 5.235193252563477, "learning_rate": 0.000213125, "loss": 1.7107, "step": 800 }, { "epoch": 0.8020050125313283, "eval_loss": Infinity, "eval_runtime": 31.0337, "eval_samples_per_second": 15.306, "eval_steps_per_second": 3.835, "eval_wer": 0.5477866061293984, "step": 800 }, { "epoch": 0.9022556390977443, "grad_norm": 3.106978178024292, "learning_rate": 0.00020062499999999996, "loss": 1.7206, "step": 900 }, { "epoch": 0.9022556390977443, "eval_loss": Infinity, "eval_runtime": 31.3113, "eval_samples_per_second": 15.17, "eval_steps_per_second": 3.801, "eval_wer": 0.548921679909194, "step": 900 }, { "epoch": 1.0020050125313282, "grad_norm": 1.7041335105895996, "learning_rate": 0.00018812499999999998, "loss": 1.6957, "step": 1000 }, { "epoch": 1.0020050125313282, "eval_loss": Infinity, "eval_runtime": 30.8808, "eval_samples_per_second": 15.382, "eval_steps_per_second": 3.854, "eval_wer": 0.5446083995459705, "step": 1000 }, { "epoch": 1.1022556390977443, "grad_norm": 2.6990814208984375, "learning_rate": 0.000175625, "loss": 1.587, "step": 1100 }, { "epoch": 1.1022556390977443, "eval_loss": Infinity, "eval_runtime": 30.9243, "eval_samples_per_second": 15.36, "eval_steps_per_second": 3.848, "eval_wer": 0.5380249716231555, "step": 1100 }, { "epoch": 1.2025062656641605, "grad_norm": 3.27028751373291, "learning_rate": 0.00016312499999999997, "loss": 1.5794, "step": 1200 }, { "epoch": 1.2025062656641605, "eval_loss": Infinity, "eval_runtime": 31.0262, "eval_samples_per_second": 15.31, "eval_steps_per_second": 3.835, "eval_wer": 0.535527809307605, "step": 1200 }, { "epoch": 1.3027568922305766, "grad_norm": 3.1246092319488525, "learning_rate": 0.000150625, "loss": 1.4728, "step": 1300 }, { "epoch": 1.3027568922305766, "eval_loss": Infinity, "eval_runtime": 31.4537, "eval_samples_per_second": 15.102, "eval_steps_per_second": 3.783, "eval_wer": 0.5250851305334847, "step": 1300 }, { "epoch": 1.4030075187969926, "grad_norm": 9.50688362121582, "learning_rate": 0.00013812499999999998, "loss": 1.5137, "step": 1400 }, { "epoch": 1.4030075187969926, "eval_loss": Infinity, "eval_runtime": 31.1621, "eval_samples_per_second": 15.243, "eval_steps_per_second": 3.819, "eval_wer": 0.5380249716231555, "step": 1400 }, { "epoch": 1.5032581453634086, "grad_norm": 11.858753204345703, "learning_rate": 0.000125625, "loss": 1.5073, "step": 1500 }, { "epoch": 1.5032581453634086, "eval_loss": Infinity, "eval_runtime": 31.1677, "eval_samples_per_second": 15.24, "eval_steps_per_second": 3.818, "eval_wer": 0.5293984108967082, "step": 1500 }, { "epoch": 1.6035087719298247, "grad_norm": 8.673787117004395, "learning_rate": 0.00011312499999999999, "loss": 1.3676, "step": 1600 }, { "epoch": 1.6035087719298247, "eval_loss": Infinity, "eval_runtime": 31.1597, "eval_samples_per_second": 15.244, "eval_steps_per_second": 3.819, "eval_wer": 0.5271282633371169, "step": 1600 }, { "epoch": 1.7037593984962407, "grad_norm": 8.988738059997559, "learning_rate": 0.00010062499999999998, "loss": 1.5592, "step": 1700 }, { "epoch": 1.7037593984962407, "eval_loss": Infinity, "eval_runtime": 31.5409, "eval_samples_per_second": 15.06, "eval_steps_per_second": 3.773, "eval_wer": 0.523950056753689, "step": 1700 }, { "epoch": 1.8040100250626567, "grad_norm": 3.7104170322418213, "learning_rate": 8.8125e-05, "loss": 1.5091, "step": 1800 }, { "epoch": 1.8040100250626567, "eval_loss": Infinity, "eval_runtime": 31.3144, "eval_samples_per_second": 15.169, "eval_steps_per_second": 3.8, "eval_wer": 0.5682179341657208, "step": 1800 }, { "epoch": 1.9042606516290728, "grad_norm": 10.594830513000488, "learning_rate": 7.5625e-05, "loss": 1.5439, "step": 1900 }, { "epoch": 1.9042606516290728, "eval_loss": Infinity, "eval_runtime": 31.2335, "eval_samples_per_second": 15.208, "eval_steps_per_second": 3.81, "eval_wer": 0.5207718501702611, "step": 1900 }, { "epoch": 2.0040100250626565, "grad_norm": 2.99572491645813, "learning_rate": 6.312499999999999e-05, "loss": 1.4025, "step": 2000 }, { "epoch": 2.0040100250626565, "eval_loss": Infinity, "eval_runtime": 31.1091, "eval_samples_per_second": 15.269, "eval_steps_per_second": 3.825, "eval_wer": 0.5275822928490352, "step": 2000 }, { "epoch": 2.1042606516290725, "grad_norm": 5.28792142868042, "learning_rate": 5.0625e-05, "loss": 1.465, "step": 2100 }, { "epoch": 2.1042606516290725, "eval_loss": Infinity, "eval_runtime": 31.511, "eval_samples_per_second": 15.074, "eval_steps_per_second": 3.776, "eval_wer": 0.5269012485811577, "step": 2100 }, { "epoch": 2.2045112781954885, "grad_norm": 1.9892240762710571, "learning_rate": 3.812499999999999e-05, "loss": 1.4096, "step": 2200 }, { "epoch": 2.2045112781954885, "eval_loss": Infinity, "eval_runtime": 31.3102, "eval_samples_per_second": 15.171, "eval_steps_per_second": 3.801, "eval_wer": 0.5346197502837684, "step": 2200 }, { "epoch": 2.3047619047619046, "grad_norm": 6.07113790512085, "learning_rate": 2.5625e-05, "loss": 1.428, "step": 2300 }, { "epoch": 2.3047619047619046, "eval_loss": Infinity, "eval_runtime": 31.1219, "eval_samples_per_second": 15.263, "eval_steps_per_second": 3.824, "eval_wer": 0.5212258796821794, "step": 2300 }, { "epoch": 2.405012531328321, "grad_norm": 18.516212463378906, "learning_rate": 1.3124999999999999e-05, "loss": 1.3829, "step": 2400 }, { "epoch": 2.405012531328321, "eval_loss": Infinity, "eval_runtime": 31.2686, "eval_samples_per_second": 15.191, "eval_steps_per_second": 3.806, "eval_wer": 0.5216799091940976, "step": 2400 }, { "epoch": 2.5052631578947366, "grad_norm": 1.5258479118347168, "learning_rate": 6.249999999999999e-07, "loss": 1.3048, "step": 2500 }, { "epoch": 2.5052631578947366, "eval_loss": Infinity, "eval_runtime": 31.5606, "eval_samples_per_second": 15.05, "eval_steps_per_second": 3.771, "eval_wer": 0.520544835414302, "step": 2500 }, { "epoch": 2.5052631578947366, "step": 2500, "total_flos": 1.25958553108086e+19, "train_loss": 2.523174108886719, "train_runtime": 3540.0212, "train_samples_per_second": 5.65, "train_steps_per_second": 0.706 } ], "logging_steps": 100, "max_steps": 2500, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.25958553108086e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }