|
{ |
|
"best_metric": 1.6984472274780273, |
|
"best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved_llamaprosocial_dialog/checkpoint-341", |
|
"epoch": 2.9531799729364003, |
|
"global_step": 341, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.9915313720703125, |
|
"eval_runtime": 22.7483, |
|
"eval_samples_per_second": 87.919, |
|
"eval_steps_per_second": 1.407, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000292814371257485, |
|
"loss": 2.0439, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.8029985427856445, |
|
"eval_runtime": 22.7395, |
|
"eval_samples_per_second": 87.953, |
|
"eval_steps_per_second": 1.407, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.7677525281906128, |
|
"eval_runtime": 22.7246, |
|
"eval_samples_per_second": 88.01, |
|
"eval_steps_per_second": 1.408, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00027485029940119756, |
|
"loss": 1.7686, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.7513495683670044, |
|
"eval_runtime": 22.772, |
|
"eval_samples_per_second": 87.827, |
|
"eval_steps_per_second": 1.405, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.7416645288467407, |
|
"eval_runtime": 22.7234, |
|
"eval_samples_per_second": 88.015, |
|
"eval_steps_per_second": 1.408, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00025688622754491017, |
|
"loss": 1.737, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.734699010848999, |
|
"eval_runtime": 22.7372, |
|
"eval_samples_per_second": 87.962, |
|
"eval_steps_per_second": 1.407, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.72977614402771, |
|
"eval_runtime": 22.74, |
|
"eval_samples_per_second": 87.951, |
|
"eval_steps_per_second": 1.407, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023892215568862272, |
|
"loss": 1.7289, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.725810170173645, |
|
"eval_runtime": 22.7407, |
|
"eval_samples_per_second": 87.948, |
|
"eval_steps_per_second": 1.407, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 1.7222310304641724, |
|
"eval_runtime": 22.733, |
|
"eval_samples_per_second": 87.978, |
|
"eval_steps_per_second": 1.408, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00022095808383233533, |
|
"loss": 1.7172, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.7189042568206787, |
|
"eval_runtime": 22.7271, |
|
"eval_samples_per_second": 88.001, |
|
"eval_steps_per_second": 1.408, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00020299401197604788, |
|
"loss": 1.7127, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 1.7177015542984009, |
|
"eval_runtime": 22.7303, |
|
"eval_samples_per_second": 87.988, |
|
"eval_steps_per_second": 1.408, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 1.7150254249572754, |
|
"eval_runtime": 22.7281, |
|
"eval_samples_per_second": 87.997, |
|
"eval_steps_per_second": 1.408, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00018502994011976046, |
|
"loss": 1.7017, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 1.7136248350143433, |
|
"eval_runtime": 22.732, |
|
"eval_samples_per_second": 87.982, |
|
"eval_steps_per_second": 1.408, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 1.7119090557098389, |
|
"eval_runtime": 22.7069, |
|
"eval_samples_per_second": 88.079, |
|
"eval_steps_per_second": 1.409, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00016706586826347302, |
|
"loss": 1.7003, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 1.7102028131484985, |
|
"eval_runtime": 22.9016, |
|
"eval_samples_per_second": 87.33, |
|
"eval_steps_per_second": 1.397, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 1.708575963973999, |
|
"eval_runtime": 22.7175, |
|
"eval_samples_per_second": 88.038, |
|
"eval_steps_per_second": 1.409, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0001491017964071856, |
|
"loss": 1.7025, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 1.7076679468154907, |
|
"eval_runtime": 22.792, |
|
"eval_samples_per_second": 87.75, |
|
"eval_steps_per_second": 1.404, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 1.7062067985534668, |
|
"eval_runtime": 22.7054, |
|
"eval_samples_per_second": 88.085, |
|
"eval_steps_per_second": 1.409, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0001311377245508982, |
|
"loss": 1.6976, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_loss": 1.7049460411071777, |
|
"eval_runtime": 22.765, |
|
"eval_samples_per_second": 87.854, |
|
"eval_steps_per_second": 1.406, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00011317365269461076, |
|
"loss": 1.6955, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_loss": 1.704172134399414, |
|
"eval_runtime": 22.8464, |
|
"eval_samples_per_second": 87.541, |
|
"eval_steps_per_second": 1.401, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.7033213376998901, |
|
"eval_runtime": 22.7378, |
|
"eval_samples_per_second": 87.959, |
|
"eval_steps_per_second": 1.407, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.520958083832335e-05, |
|
"loss": 1.6893, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_loss": 1.7023844718933105, |
|
"eval_runtime": 22.7421, |
|
"eval_samples_per_second": 87.942, |
|
"eval_steps_per_second": 1.407, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 1.7013720273971558, |
|
"eval_runtime": 22.7212, |
|
"eval_samples_per_second": 88.023, |
|
"eval_steps_per_second": 1.408, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.724550898203592e-05, |
|
"loss": 1.6853, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 1.7013109922409058, |
|
"eval_runtime": 22.7306, |
|
"eval_samples_per_second": 87.987, |
|
"eval_steps_per_second": 1.408, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_loss": 1.7006632089614868, |
|
"eval_runtime": 22.748, |
|
"eval_samples_per_second": 87.92, |
|
"eval_steps_per_second": 1.407, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.92814371257485e-05, |
|
"loss": 1.6877, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 1.7002646923065186, |
|
"eval_runtime": 22.7267, |
|
"eval_samples_per_second": 88.002, |
|
"eval_steps_per_second": 1.408, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_loss": 1.6997302770614624, |
|
"eval_runtime": 22.7256, |
|
"eval_samples_per_second": 88.006, |
|
"eval_steps_per_second": 1.408, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.131736526946108e-05, |
|
"loss": 1.6834, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 1.6993123292922974, |
|
"eval_runtime": 22.7463, |
|
"eval_samples_per_second": 87.926, |
|
"eval_steps_per_second": 1.407, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 1.6989519596099854, |
|
"eval_runtime": 22.7888, |
|
"eval_samples_per_second": 87.762, |
|
"eval_steps_per_second": 1.404, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.3353293413173652e-05, |
|
"loss": 1.6854, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 1.698702335357666, |
|
"eval_runtime": 22.724, |
|
"eval_samples_per_second": 88.013, |
|
"eval_steps_per_second": 1.408, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.389221556886227e-06, |
|
"loss": 1.6858, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_loss": 1.6984472274780273, |
|
"eval_runtime": 22.7255, |
|
"eval_samples_per_second": 88.007, |
|
"eval_steps_per_second": 1.408, |
|
"step": 341 |
|
} |
|
], |
|
"max_steps": 345, |
|
"num_train_epochs": 3, |
|
"total_flos": 2.604629423378399e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|