|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.3872216844143272, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003872216844143272, |
|
"eval_loss": 0.18226714432239532, |
|
"eval_runtime": 31.1866, |
|
"eval_samples_per_second": 13.948, |
|
"eval_steps_per_second": 1.764, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.011616650532429816, |
|
"grad_norm": 4.684105396270752, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7421, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.023233301064859633, |
|
"grad_norm": 3.108184576034546, |
|
"learning_rate": 6e-05, |
|
"loss": 0.4496, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03484995159728945, |
|
"grad_norm": 1.3367609977722168, |
|
"learning_rate": 9e-05, |
|
"loss": 0.154, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03484995159728945, |
|
"eval_loss": 0.005886069033294916, |
|
"eval_runtime": 31.6952, |
|
"eval_samples_per_second": 13.724, |
|
"eval_steps_per_second": 1.735, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.046466602129719266, |
|
"grad_norm": 0.09060771763324738, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 0.0114, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.05808325266214908, |
|
"grad_norm": 0.8179565072059631, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 0.0038, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0696999031945789, |
|
"grad_norm": 1.4341135025024414, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 0.0018, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0696999031945789, |
|
"eval_loss": 0.00017564959125593305, |
|
"eval_runtime": 31.7814, |
|
"eval_samples_per_second": 13.687, |
|
"eval_steps_per_second": 1.731, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.08131655372700872, |
|
"grad_norm": 0.004023493733257055, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 0.0001, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.09293320425943853, |
|
"grad_norm": 0.23858696222305298, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 0.0015, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.10454985479186835, |
|
"grad_norm": 0.026785193011164665, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 0.0033, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.10454985479186835, |
|
"eval_loss": 0.0001601200783625245, |
|
"eval_runtime": 31.8153, |
|
"eval_samples_per_second": 13.673, |
|
"eval_steps_per_second": 1.729, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.11616650532429816, |
|
"grad_norm": 0.3270968198776245, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 0.0015, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.12778315585672798, |
|
"grad_norm": 0.01174607127904892, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 0.0003, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.1393998063891578, |
|
"grad_norm": 0.9263725280761719, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 0.0008, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.1393998063891578, |
|
"eval_loss": 0.0002172403474105522, |
|
"eval_runtime": 31.8495, |
|
"eval_samples_per_second": 13.658, |
|
"eval_steps_per_second": 1.727, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.1510164569215876, |
|
"grad_norm": 0.01262608077377081, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 0.0014, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.16263310745401743, |
|
"grad_norm": 0.01573655568063259, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 0.0002, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.17424975798644723, |
|
"grad_norm": 0.061099544167518616, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 0.0002, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.17424975798644723, |
|
"eval_loss": 0.00012387760216370225, |
|
"eval_runtime": 31.8026, |
|
"eval_samples_per_second": 13.678, |
|
"eval_steps_per_second": 1.729, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.18586640851887706, |
|
"grad_norm": 0.05485091730952263, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 0.0015, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.19748305905130686, |
|
"grad_norm": 0.002600950188934803, |
|
"learning_rate": 5.695865504800327e-05, |
|
"loss": 0.0001, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.2090997095837367, |
|
"grad_norm": 0.0022585629485547543, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 0.0, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.2090997095837367, |
|
"eval_loss": 0.00028623713296838105, |
|
"eval_runtime": 31.8325, |
|
"eval_samples_per_second": 13.665, |
|
"eval_steps_per_second": 1.728, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.2207163601161665, |
|
"grad_norm": 0.007769421208649874, |
|
"learning_rate": 4.6512176312793736e-05, |
|
"loss": 0.0001, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.23233301064859632, |
|
"grad_norm": 0.0027711803559213877, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 0.0048, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.24394966118102615, |
|
"grad_norm": 0.003505075117573142, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 0.0004, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.24394966118102615, |
|
"eval_loss": 2.9244753022794612e-05, |
|
"eval_runtime": 31.8226, |
|
"eval_samples_per_second": 13.67, |
|
"eval_steps_per_second": 1.728, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.25556631171345595, |
|
"grad_norm": 0.0020100402180105448, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 0.0004, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.2671829622458858, |
|
"grad_norm": 0.004158449359238148, |
|
"learning_rate": 2.6526421860705473e-05, |
|
"loss": 0.0001, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.2787996127783156, |
|
"grad_norm": 0.08573237806558609, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 0.0001, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.2787996127783156, |
|
"eval_loss": 2.5135599571513012e-05, |
|
"eval_runtime": 31.8171, |
|
"eval_samples_per_second": 13.672, |
|
"eval_steps_per_second": 1.729, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.2904162633107454, |
|
"grad_norm": 0.006858558859676123, |
|
"learning_rate": 1.7860619515673033e-05, |
|
"loss": 0.0001, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3020329138431752, |
|
"grad_norm": 0.004765031859278679, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 0.0001, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.31364956437560504, |
|
"grad_norm": 0.011414007283747196, |
|
"learning_rate": 1.0599462319663905e-05, |
|
"loss": 0.0001, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.31364956437560504, |
|
"eval_loss": 2.312105061719194e-05, |
|
"eval_runtime": 31.8058, |
|
"eval_samples_per_second": 13.677, |
|
"eval_steps_per_second": 1.729, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.32526621490803487, |
|
"grad_norm": 0.007097288966178894, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 0.0001, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.33688286544046464, |
|
"grad_norm": 0.002511974424123764, |
|
"learning_rate": 5.060297685041659e-06, |
|
"loss": 0.0001, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.34849951597289447, |
|
"grad_norm": 0.0035338769666850567, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 0.0001, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.34849951597289447, |
|
"eval_loss": 2.103859333146829e-05, |
|
"eval_runtime": 31.8073, |
|
"eval_samples_per_second": 13.676, |
|
"eval_steps_per_second": 1.729, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3601161665053243, |
|
"grad_norm": 0.3512181043624878, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 0.0002, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.3717328170377541, |
|
"grad_norm": 0.005195324309170246, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 0.0001, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.38334946757018395, |
|
"grad_norm": 0.07296066731214523, |
|
"learning_rate": 3.04586490452119e-08, |
|
"loss": 0.0001, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.38334946757018395, |
|
"eval_loss": 1.9248587705078535e-05, |
|
"eval_runtime": 31.8075, |
|
"eval_samples_per_second": 13.676, |
|
"eval_steps_per_second": 1.729, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4279806347431117e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|