{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4, "learning_rate": 3e-05, "loss": 2.3698, "step": 100 }, { "epoch": 0.8, "learning_rate": 3e-05, "loss": 1.8255, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.6054430379746836, "eval_loss": 1.83919358253479, "eval_runtime": 4.6476, "eval_samples_per_second": 107.584, "eval_steps_per_second": 13.556, "step": 250 }, { "epoch": 1.2, "learning_rate": 3e-05, "loss": 1.7843, "step": 300 }, { "epoch": 1.6, "learning_rate": 3e-05, "loss": 1.7546, "step": 400 }, { "epoch": 2.0, "learning_rate": 3e-05, "loss": 1.7368, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.6078227848101266, "eval_loss": 1.81111478805542, "eval_runtime": 4.7063, "eval_samples_per_second": 106.242, "eval_steps_per_second": 13.386, "step": 500 }, { "epoch": 2.4, "learning_rate": 3e-05, "loss": 1.6749, "step": 600 }, { "epoch": 2.8, "learning_rate": 3e-05, "loss": 1.6689, "step": 700 }, { "epoch": 3.0, "eval_accuracy": 0.607493670886076, "eval_loss": 1.8103100061416626, "eval_runtime": 5.2341, "eval_samples_per_second": 95.528, "eval_steps_per_second": 12.037, "step": 750 }, { "epoch": 3.2, "learning_rate": 3e-05, "loss": 1.6205, "step": 800 }, { "epoch": 3.6, "learning_rate": 3e-05, "loss": 1.556, "step": 900 }, { "epoch": 4.0, "learning_rate": 3e-05, "loss": 1.5555, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.6067341772151899, "eval_loss": 1.8414338827133179, "eval_runtime": 4.8146, "eval_samples_per_second": 103.85, "eval_steps_per_second": 13.085, "step": 1000 }, { "epoch": 4.4, "learning_rate": 3e-05, "loss": 1.4289, "step": 1100 }, { "epoch": 4.8, "learning_rate": 3e-05, "loss": 1.4559, "step": 1200 }, { "epoch": 5.0, "eval_accuracy": 0.6037721518987341, "eval_loss": 1.8992472887039185, "eval_runtime": 5.1125, "eval_samples_per_second": 97.799, "eval_steps_per_second": 12.323, "step": 1250 }, { "epoch": 5.2, "learning_rate": 3e-05, "loss": 1.3828, "step": 1300 }, { "epoch": 5.6, "learning_rate": 3e-05, "loss": 1.3271, "step": 1400 }, { "epoch": 6.0, "learning_rate": 3e-05, "loss": 1.3514, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.6018227848101266, "eval_loss": 1.9584064483642578, "eval_runtime": 4.4025, "eval_samples_per_second": 113.572, "eval_steps_per_second": 14.31, "step": 1500 }, { "epoch": 6.4, "learning_rate": 3e-05, "loss": 1.2239, "step": 1600 }, { "epoch": 6.8, "learning_rate": 3e-05, "loss": 1.2491, "step": 1700 }, { "epoch": 7.0, "eval_accuracy": 0.5999746835443038, "eval_loss": 2.030003070831299, "eval_runtime": 4.7046, "eval_samples_per_second": 106.279, "eval_steps_per_second": 13.391, "step": 1750 }, { "epoch": 7.2, "learning_rate": 3e-05, "loss": 1.1873, "step": 1800 }, { "epoch": 7.6, "learning_rate": 3e-05, "loss": 1.1455, "step": 1900 }, { "epoch": 8.0, "learning_rate": 3e-05, "loss": 1.1749, "step": 2000 }, { "epoch": 8.0, "eval_accuracy": 0.5981518987341772, "eval_loss": 2.1050591468811035, "eval_runtime": 4.5572, "eval_samples_per_second": 109.717, "eval_steps_per_second": 13.824, "step": 2000 }, { "epoch": 8.4, "learning_rate": 3e-05, "loss": 1.0398, "step": 2100 }, { "epoch": 8.8, "learning_rate": 3e-05, "loss": 1.0769, "step": 2200 }, { "epoch": 9.0, "eval_accuracy": 0.5953924050632912, "eval_loss": 2.194838762283325, "eval_runtime": 5.1306, "eval_samples_per_second": 97.455, "eval_steps_per_second": 12.279, "step": 2250 }, { "epoch": 9.2, "learning_rate": 3e-05, "loss": 1.0208, "step": 2300 }, { "epoch": 9.6, "learning_rate": 3e-05, "loss": 0.9809, "step": 2400 }, { "epoch": 10.0, "learning_rate": 3e-05, "loss": 1.0134, "step": 2500 }, { "epoch": 10.0, "eval_accuracy": 0.594253164556962, "eval_loss": 2.2515170574188232, "eval_runtime": 4.7037, "eval_samples_per_second": 106.3, "eval_steps_per_second": 13.394, "step": 2500 }, { "epoch": 10.4, "learning_rate": 3e-05, "loss": 0.8808, "step": 2600 }, { "epoch": 10.8, "learning_rate": 3e-05, "loss": 0.9209, "step": 2700 }, { "epoch": 11.0, "eval_accuracy": 0.592126582278481, "eval_loss": 2.3421294689178467, "eval_runtime": 4.5581, "eval_samples_per_second": 109.695, "eval_steps_per_second": 13.822, "step": 2750 }, { "epoch": 11.2, "learning_rate": 3e-05, "loss": 0.881, "step": 2800 }, { "epoch": 11.6, "learning_rate": 3e-05, "loss": 0.8321, "step": 2900 }, { "epoch": 12.0, "learning_rate": 3e-05, "loss": 0.8636, "step": 3000 }, { "epoch": 12.0, "eval_accuracy": 0.5905063291139241, "eval_loss": 2.4442591667175293, "eval_runtime": 5.1497, "eval_samples_per_second": 97.093, "eval_steps_per_second": 12.234, "step": 3000 }, { "epoch": 12.4, "learning_rate": 3e-05, "loss": 0.7437, "step": 3100 }, { "epoch": 12.8, "learning_rate": 3e-05, "loss": 0.7866, "step": 3200 }, { "epoch": 13.0, "eval_accuracy": 0.588, "eval_loss": 2.557358741760254, "eval_runtime": 4.558, "eval_samples_per_second": 109.698, "eval_steps_per_second": 13.822, "step": 3250 }, { "epoch": 13.2, "learning_rate": 3e-05, "loss": 0.7408, "step": 3300 }, { "epoch": 13.6, "learning_rate": 3e-05, "loss": 0.7067, "step": 3400 }, { "epoch": 14.0, "learning_rate": 3e-05, "loss": 0.7448, "step": 3500 }, { "epoch": 14.0, "eval_accuracy": 0.5866835443037974, "eval_loss": 2.579989194869995, "eval_runtime": 4.4082, "eval_samples_per_second": 113.424, "eval_steps_per_second": 14.291, "step": 3500 }, { "epoch": 14.4, "learning_rate": 3e-05, "loss": 0.637, "step": 3600 }, { "epoch": 14.8, "learning_rate": 3e-05, "loss": 0.6709, "step": 3700 }, { "epoch": 15.0, "eval_accuracy": 0.5845569620253165, "eval_loss": 2.6911704540252686, "eval_runtime": 5.1406, "eval_samples_per_second": 97.265, "eval_steps_per_second": 12.255, "step": 3750 }, { "epoch": 15.2, "learning_rate": 3e-05, "loss": 0.6375, "step": 3800 }, { "epoch": 15.6, "learning_rate": 3e-05, "loss": 0.6088, "step": 3900 }, { "epoch": 16.0, "learning_rate": 3e-05, "loss": 0.6439, "step": 4000 }, { "epoch": 16.0, "eval_accuracy": 0.5853164556962025, "eval_loss": 2.7545602321624756, "eval_runtime": 4.7065, "eval_samples_per_second": 106.237, "eval_steps_per_second": 13.386, "step": 4000 }, { "epoch": 16.4, "learning_rate": 3e-05, "loss": 0.5552, "step": 4100 }, { "epoch": 16.8, "learning_rate": 3e-05, "loss": 0.5869, "step": 4200 }, { "epoch": 17.0, "eval_accuracy": 0.5831139240506329, "eval_loss": 2.799652338027954, "eval_runtime": 5.213, "eval_samples_per_second": 95.914, "eval_steps_per_second": 12.085, "step": 4250 }, { "epoch": 17.2, "learning_rate": 3e-05, "loss": 0.5547, "step": 4300 }, { "epoch": 17.6, "learning_rate": 3e-05, "loss": 0.5336, "step": 4400 }, { "epoch": 18.0, "learning_rate": 3e-05, "loss": 0.5596, "step": 4500 }, { "epoch": 18.0, "eval_accuracy": 0.5832911392405064, "eval_loss": 2.843494176864624, "eval_runtime": 4.6373, "eval_samples_per_second": 107.822, "eval_steps_per_second": 13.586, "step": 4500 }, { "epoch": 18.4, "learning_rate": 3e-05, "loss": 0.4871, "step": 4600 }, { "epoch": 18.8, "learning_rate": 3e-05, "loss": 0.5205, "step": 4700 }, { "epoch": 19.0, "eval_accuracy": 0.5832911392405064, "eval_loss": 2.9509618282318115, "eval_runtime": 4.4063, "eval_samples_per_second": 113.473, "eval_steps_per_second": 14.298, "step": 4750 }, { "epoch": 19.2, "learning_rate": 3e-05, "loss": 0.4924, "step": 4800 }, { "epoch": 19.6, "learning_rate": 3e-05, "loss": 0.4789, "step": 4900 }, { "epoch": 20.0, "learning_rate": 3e-05, "loss": 0.5045, "step": 5000 }, { "epoch": 20.0, "eval_accuracy": 0.5824050632911393, "eval_loss": 2.9796953201293945, "eval_runtime": 5.1055, "eval_samples_per_second": 97.933, "eval_steps_per_second": 12.34, "step": 5000 }, { "epoch": 20.0, "step": 5000, "total_flos": 3.1967425075347456e+17, "train_loss": 1.043557526397705, "train_runtime": 3605.2187, "train_samples_per_second": 44.38, "train_steps_per_second": 1.387 } ], "logging_steps": 100, "max_steps": 5000, "num_train_epochs": 20, "save_steps": 500, "total_flos": 3.1967425075347456e+17, "trial_name": null, "trial_params": null }