{ "best_metric": 2.124704599380493, "best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved_llamaHC3_human/checkpoint-140", "epoch": 2.5339366515837103, "global_step": 140, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "eval_loss": 2.3469691276550293, "eval_runtime": 40.7893, "eval_samples_per_second": 49.032, "eval_steps_per_second": 0.785, "step": 5 }, { "epoch": 0.18, "eval_loss": 2.254398822784424, "eval_runtime": 40.7321, "eval_samples_per_second": 49.101, "eval_steps_per_second": 0.786, "step": 10 }, { "epoch": 0.27, "eval_loss": 2.216655731201172, "eval_runtime": 40.7948, "eval_samples_per_second": 49.026, "eval_steps_per_second": 0.784, "step": 15 }, { "epoch": 0.36, "learning_rate": 0.000271875, "loss": 2.265, "step": 20 }, { "epoch": 0.36, "eval_loss": 2.192472219467163, "eval_runtime": 40.9073, "eval_samples_per_second": 48.891, "eval_steps_per_second": 0.782, "step": 20 }, { "epoch": 0.45, "eval_loss": 2.1772661209106445, "eval_runtime": 40.8472, "eval_samples_per_second": 48.963, "eval_steps_per_second": 0.783, "step": 25 }, { "epoch": 0.54, "eval_loss": 2.1680707931518555, "eval_runtime": 40.7376, "eval_samples_per_second": 49.095, "eval_steps_per_second": 0.786, "step": 30 }, { "epoch": 0.63, "eval_loss": 2.15969181060791, "eval_runtime": 40.7043, "eval_samples_per_second": 49.135, "eval_steps_per_second": 0.786, "step": 35 }, { "epoch": 0.72, "learning_rate": 0.00023624999999999997, "loss": 2.1528, "step": 40 }, { "epoch": 0.72, "eval_loss": 2.1535723209381104, "eval_runtime": 40.7472, "eval_samples_per_second": 49.083, "eval_steps_per_second": 0.785, "step": 40 }, { "epoch": 0.81, "eval_loss": 2.149122476577759, "eval_runtime": 40.7594, "eval_samples_per_second": 49.068, "eval_steps_per_second": 0.785, "step": 45 }, { "epoch": 0.9, "eval_loss": 2.1453487873077393, "eval_runtime": 40.7319, "eval_samples_per_second": 49.102, "eval_steps_per_second": 0.786, "step": 50 }, { "epoch": 1.0, "eval_loss": 2.142251968383789, "eval_runtime": 40.8482, "eval_samples_per_second": 48.962, "eval_steps_per_second": 0.783, "step": 55 }, { "epoch": 1.09, "learning_rate": 0.00019874999999999998, "loss": 2.1332, "step": 60 }, { "epoch": 1.09, "eval_loss": 2.139620065689087, "eval_runtime": 40.7619, "eval_samples_per_second": 49.065, "eval_steps_per_second": 0.785, "step": 60 }, { "epoch": 1.18, "eval_loss": 2.137423276901245, "eval_runtime": 40.783, "eval_samples_per_second": 49.04, "eval_steps_per_second": 0.785, "step": 65 }, { "epoch": 1.27, "eval_loss": 2.135718822479248, "eval_runtime": 40.7802, "eval_samples_per_second": 49.043, "eval_steps_per_second": 0.785, "step": 70 }, { "epoch": 1.36, "eval_loss": 2.134214401245117, "eval_runtime": 40.7213, "eval_samples_per_second": 49.114, "eval_steps_per_second": 0.786, "step": 75 }, { "epoch": 1.45, "learning_rate": 0.00016125, "loss": 2.1152, "step": 80 }, { "epoch": 1.45, "eval_loss": 2.1330068111419678, "eval_runtime": 40.8447, "eval_samples_per_second": 48.966, "eval_steps_per_second": 0.783, "step": 80 }, { "epoch": 1.54, "eval_loss": 2.131664276123047, "eval_runtime": 40.9633, "eval_samples_per_second": 48.824, "eval_steps_per_second": 0.781, "step": 85 }, { "epoch": 1.63, "eval_loss": 2.1305532455444336, "eval_runtime": 40.7803, "eval_samples_per_second": 49.043, "eval_steps_per_second": 0.785, "step": 90 }, { "epoch": 1.72, "eval_loss": 2.1295671463012695, "eval_runtime": 40.8088, "eval_samples_per_second": 49.009, "eval_steps_per_second": 0.784, "step": 95 }, { "epoch": 1.81, "learning_rate": 0.00012374999999999997, "loss": 2.1138, "step": 100 }, { "epoch": 1.81, "eval_loss": 2.1289596557617188, "eval_runtime": 40.7573, "eval_samples_per_second": 49.071, "eval_steps_per_second": 0.785, "step": 100 }, { "epoch": 1.9, "eval_loss": 2.1280689239501953, "eval_runtime": 40.8322, "eval_samples_per_second": 48.981, "eval_steps_per_second": 0.784, "step": 105 }, { "epoch": 1.99, "eval_loss": 2.127443552017212, "eval_runtime": 40.9036, "eval_samples_per_second": 48.895, "eval_steps_per_second": 0.782, "step": 110 }, { "epoch": 2.08, "eval_loss": 2.126908779144287, "eval_runtime": 41.0214, "eval_samples_per_second": 48.755, "eval_steps_per_second": 0.78, "step": 115 }, { "epoch": 2.17, "learning_rate": 8.624999999999998e-05, "loss": 2.1101, "step": 120 }, { "epoch": 2.17, "eval_loss": 2.126199245452881, "eval_runtime": 40.9707, "eval_samples_per_second": 48.815, "eval_steps_per_second": 0.781, "step": 120 }, { "epoch": 2.26, "eval_loss": 2.125767230987549, "eval_runtime": 40.7834, "eval_samples_per_second": 49.04, "eval_steps_per_second": 0.785, "step": 125 }, { "epoch": 2.35, "eval_loss": 2.125452995300293, "eval_runtime": 40.8009, "eval_samples_per_second": 49.019, "eval_steps_per_second": 0.784, "step": 130 }, { "epoch": 2.44, "eval_loss": 2.1250967979431152, "eval_runtime": 40.7689, "eval_samples_per_second": 49.057, "eval_steps_per_second": 0.785, "step": 135 }, { "epoch": 2.53, "learning_rate": 4.875e-05, "loss": 2.108, "step": 140 }, { "epoch": 2.53, "eval_loss": 2.124704599380493, "eval_runtime": 40.7682, "eval_samples_per_second": 49.058, "eval_steps_per_second": 0.785, "step": 140 } ], "max_steps": 165, "num_train_epochs": 3, "total_flos": 2.0586863673863045e+18, "trial_name": null, "trial_params": null }