{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.901960784313726, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.33, "learning_rate": 4.673202614379085e-05, "loss": 0.0803, "step": 200 }, { "epoch": 0.33, "eval_accuracy": 0.9874985395490128, "eval_loss": 0.03710582107305527, "eval_runtime": 80.6999, "eval_samples_per_second": 106.06, "eval_steps_per_second": 2.528, "step": 200 }, { "epoch": 0.65, "learning_rate": 4.3464052287581704e-05, "loss": 0.0393, "step": 400 }, { "epoch": 0.65, "eval_accuracy": 0.9884332281808622, "eval_loss": 0.03642109036445618, "eval_runtime": 80.6008, "eval_samples_per_second": 106.19, "eval_steps_per_second": 2.531, "step": 400 }, { "epoch": 0.98, "learning_rate": 4.0196078431372555e-05, "loss": 0.0372, "step": 600 }, { "epoch": 0.98, "eval_accuracy": 0.986447014838182, "eval_loss": 0.04179869592189789, "eval_runtime": 80.6457, "eval_samples_per_second": 106.131, "eval_steps_per_second": 2.53, "step": 600 }, { "epoch": 1.31, "learning_rate": 3.6928104575163405e-05, "loss": 0.0114, "step": 800 }, { "epoch": 1.31, "eval_accuracy": 0.9896015889706742, "eval_loss": 0.0394107885658741, "eval_runtime": 80.6065, "eval_samples_per_second": 106.182, "eval_steps_per_second": 2.531, "step": 800 }, { "epoch": 1.63, "learning_rate": 3.366013071895425e-05, "loss": 0.0138, "step": 1000 }, { "epoch": 1.63, "eval_accuracy": 0.9897184250496553, "eval_loss": 0.0397811159491539, "eval_runtime": 80.5677, "eval_samples_per_second": 106.234, "eval_steps_per_second": 2.532, "step": 1000 }, { "epoch": 1.96, "learning_rate": 3.0392156862745097e-05, "loss": 0.0127, "step": 1200 }, { "epoch": 1.96, "eval_accuracy": 0.9908867858394672, "eval_loss": 0.034337081015110016, "eval_runtime": 80.5302, "eval_samples_per_second": 106.283, "eval_steps_per_second": 2.533, "step": 1200 }, { "epoch": 2.29, "learning_rate": 2.7124183006535947e-05, "loss": 0.0052, "step": 1400 }, { "epoch": 2.29, "eval_accuracy": 0.9911204579974296, "eval_loss": 0.037403274327516556, "eval_runtime": 80.6258, "eval_samples_per_second": 106.157, "eval_steps_per_second": 2.53, "step": 1400 }, { "epoch": 2.61, "learning_rate": 2.38562091503268e-05, "loss": 0.0018, "step": 1600 }, { "epoch": 2.61, "eval_accuracy": 0.9906531136815049, "eval_loss": 0.04766124114394188, "eval_runtime": 80.6271, "eval_samples_per_second": 106.155, "eval_steps_per_second": 2.53, "step": 1600 }, { "epoch": 2.94, "learning_rate": 2.058823529411765e-05, "loss": 0.0009, "step": 1800 }, { "epoch": 2.94, "eval_accuracy": 0.9912372940764108, "eval_loss": 0.0513538159430027, "eval_runtime": 80.6405, "eval_samples_per_second": 106.138, "eval_steps_per_second": 2.53, "step": 1800 }, { "epoch": 3.27, "learning_rate": 1.7320261437908496e-05, "loss": 0.001, "step": 2000 }, { "epoch": 3.27, "eval_accuracy": 0.9904194415235424, "eval_loss": 0.046223659068346024, "eval_runtime": 80.5163, "eval_samples_per_second": 106.301, "eval_steps_per_second": 2.534, "step": 2000 }, { "epoch": 3.59, "learning_rate": 1.4052287581699347e-05, "loss": 0.0002, "step": 2200 }, { "epoch": 3.59, "eval_accuracy": 0.9911204579974296, "eval_loss": 0.04658184573054314, "eval_runtime": 80.6281, "eval_samples_per_second": 106.154, "eval_steps_per_second": 2.53, "step": 2200 }, { "epoch": 3.92, "learning_rate": 1.0784313725490197e-05, "loss": 0.0006, "step": 2400 }, { "epoch": 3.92, "eval_accuracy": 0.9905362776025236, "eval_loss": 0.046631619334220886, "eval_runtime": 80.5126, "eval_samples_per_second": 106.306, "eval_steps_per_second": 2.534, "step": 2400 }, { "epoch": 4.25, "learning_rate": 7.5163398692810456e-06, "loss": 0.0001, "step": 2600 }, { "epoch": 4.25, "eval_accuracy": 0.990769949760486, "eval_loss": 0.05143677070736885, "eval_runtime": 80.6039, "eval_samples_per_second": 106.186, "eval_steps_per_second": 2.531, "step": 2600 }, { "epoch": 4.58, "learning_rate": 4.2483660130718954e-06, "loss": 0.0005, "step": 2800 }, { "epoch": 4.58, "eval_accuracy": 0.9908867858394672, "eval_loss": 0.04908544197678566, "eval_runtime": 80.4782, "eval_samples_per_second": 106.352, "eval_steps_per_second": 2.535, "step": 2800 }, { "epoch": 4.9, "learning_rate": 9.80392156862745e-07, "loss": 0.0004, "step": 3000 }, { "epoch": 4.9, "eval_accuracy": 0.9910036219184484, "eval_loss": 0.049937766045331955, "eval_runtime": 80.6026, "eval_samples_per_second": 106.188, "eval_steps_per_second": 2.531, "step": 3000 } ], "max_steps": 3060, "num_train_epochs": 5, "total_flos": 0.0, "trial_name": null, "trial_params": null }