|
{ |
|
"best_metric": 2.123983860015869, |
|
"best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved_llamaHC3_human/checkpoint-160", |
|
"epoch": 2.8959276018099547, |
|
"global_step": 160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.3469691276550293, |
|
"eval_runtime": 40.7893, |
|
"eval_samples_per_second": 49.032, |
|
"eval_steps_per_second": 0.785, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.254398822784424, |
|
"eval_runtime": 40.7321, |
|
"eval_samples_per_second": 49.101, |
|
"eval_steps_per_second": 0.786, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 2.216655731201172, |
|
"eval_runtime": 40.7948, |
|
"eval_samples_per_second": 49.026, |
|
"eval_steps_per_second": 0.784, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.000271875, |
|
"loss": 2.265, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 2.192472219467163, |
|
"eval_runtime": 40.9073, |
|
"eval_samples_per_second": 48.891, |
|
"eval_steps_per_second": 0.782, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 2.1772661209106445, |
|
"eval_runtime": 40.8472, |
|
"eval_samples_per_second": 48.963, |
|
"eval_steps_per_second": 0.783, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 2.1680707931518555, |
|
"eval_runtime": 40.7376, |
|
"eval_samples_per_second": 49.095, |
|
"eval_steps_per_second": 0.786, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 2.15969181060791, |
|
"eval_runtime": 40.7043, |
|
"eval_samples_per_second": 49.135, |
|
"eval_steps_per_second": 0.786, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00023624999999999997, |
|
"loss": 2.1528, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 2.1535723209381104, |
|
"eval_runtime": 40.7472, |
|
"eval_samples_per_second": 49.083, |
|
"eval_steps_per_second": 0.785, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 2.149122476577759, |
|
"eval_runtime": 40.7594, |
|
"eval_samples_per_second": 49.068, |
|
"eval_steps_per_second": 0.785, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 2.1453487873077393, |
|
"eval_runtime": 40.7319, |
|
"eval_samples_per_second": 49.102, |
|
"eval_steps_per_second": 0.786, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.142251968383789, |
|
"eval_runtime": 40.8482, |
|
"eval_samples_per_second": 48.962, |
|
"eval_steps_per_second": 0.783, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019874999999999998, |
|
"loss": 2.1332, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 2.139620065689087, |
|
"eval_runtime": 40.7619, |
|
"eval_samples_per_second": 49.065, |
|
"eval_steps_per_second": 0.785, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 2.137423276901245, |
|
"eval_runtime": 40.783, |
|
"eval_samples_per_second": 49.04, |
|
"eval_steps_per_second": 0.785, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 2.135718822479248, |
|
"eval_runtime": 40.7802, |
|
"eval_samples_per_second": 49.043, |
|
"eval_steps_per_second": 0.785, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 2.134214401245117, |
|
"eval_runtime": 40.7213, |
|
"eval_samples_per_second": 49.114, |
|
"eval_steps_per_second": 0.786, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00016125, |
|
"loss": 2.1152, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 2.1330068111419678, |
|
"eval_runtime": 40.8447, |
|
"eval_samples_per_second": 48.966, |
|
"eval_steps_per_second": 0.783, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_loss": 2.131664276123047, |
|
"eval_runtime": 40.9633, |
|
"eval_samples_per_second": 48.824, |
|
"eval_steps_per_second": 0.781, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_loss": 2.1305532455444336, |
|
"eval_runtime": 40.7803, |
|
"eval_samples_per_second": 49.043, |
|
"eval_steps_per_second": 0.785, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_loss": 2.1295671463012695, |
|
"eval_runtime": 40.8088, |
|
"eval_samples_per_second": 49.009, |
|
"eval_steps_per_second": 0.784, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00012374999999999997, |
|
"loss": 2.1138, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_loss": 2.1289596557617188, |
|
"eval_runtime": 40.7573, |
|
"eval_samples_per_second": 49.071, |
|
"eval_steps_per_second": 0.785, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_loss": 2.1280689239501953, |
|
"eval_runtime": 40.8322, |
|
"eval_samples_per_second": 48.981, |
|
"eval_steps_per_second": 0.784, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 2.127443552017212, |
|
"eval_runtime": 40.9036, |
|
"eval_samples_per_second": 48.895, |
|
"eval_steps_per_second": 0.782, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 2.126908779144287, |
|
"eval_runtime": 41.0214, |
|
"eval_samples_per_second": 48.755, |
|
"eval_steps_per_second": 0.78, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.624999999999998e-05, |
|
"loss": 2.1101, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_loss": 2.126199245452881, |
|
"eval_runtime": 40.9707, |
|
"eval_samples_per_second": 48.815, |
|
"eval_steps_per_second": 0.781, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 2.125767230987549, |
|
"eval_runtime": 40.7834, |
|
"eval_samples_per_second": 49.04, |
|
"eval_steps_per_second": 0.785, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_loss": 2.125452995300293, |
|
"eval_runtime": 40.8009, |
|
"eval_samples_per_second": 49.019, |
|
"eval_steps_per_second": 0.784, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_loss": 2.1250967979431152, |
|
"eval_runtime": 40.7689, |
|
"eval_samples_per_second": 49.057, |
|
"eval_steps_per_second": 0.785, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.875e-05, |
|
"loss": 2.108, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 2.124704599380493, |
|
"eval_runtime": 40.7682, |
|
"eval_samples_per_second": 49.058, |
|
"eval_steps_per_second": 0.785, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 2.124401807785034, |
|
"eval_runtime": 40.7695, |
|
"eval_samples_per_second": 49.056, |
|
"eval_steps_per_second": 0.785, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_loss": 2.1242740154266357, |
|
"eval_runtime": 40.8982, |
|
"eval_samples_per_second": 48.902, |
|
"eval_steps_per_second": 0.782, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_loss": 2.124105930328369, |
|
"eval_runtime": 40.7925, |
|
"eval_samples_per_second": 49.029, |
|
"eval_steps_per_second": 0.784, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.1249999999999999e-05, |
|
"loss": 2.0996, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_loss": 2.123983860015869, |
|
"eval_runtime": 40.7694, |
|
"eval_samples_per_second": 49.056, |
|
"eval_steps_per_second": 0.785, |
|
"step": 160 |
|
} |
|
], |
|
"max_steps": 165, |
|
"num_train_epochs": 3, |
|
"total_flos": 2.354051216792617e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|