|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 99.87154784842646, |
|
"eval_steps": 3110, |
|
"global_step": 31100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 9.987154784842646, |
|
"grad_norm": 3.000732898712158, |
|
"learning_rate": 9.00096463022508e-06, |
|
"loss": 2.3923, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 9.987154784842646, |
|
"eval_accuracy": 0.0765299914831488, |
|
"eval_loss": 3.097198724746704, |
|
"eval_runtime": 31.7938, |
|
"eval_samples_per_second": 258.51, |
|
"eval_steps_per_second": 12.927, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 19.974309569685293, |
|
"grad_norm": 7.0140557289123535, |
|
"learning_rate": 8.00128617363344e-06, |
|
"loss": 2.1272, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 19.974309569685293, |
|
"eval_accuracy": 0.07817252707141988, |
|
"eval_loss": 3.585836410522461, |
|
"eval_runtime": 31.7391, |
|
"eval_samples_per_second": 258.955, |
|
"eval_steps_per_second": 12.949, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 29.961464354527937, |
|
"grad_norm": 4.256643295288086, |
|
"learning_rate": 7.001607717041802e-06, |
|
"loss": 1.9949, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 29.961464354527937, |
|
"eval_accuracy": 0.07851725676278541, |
|
"eval_loss": 3.6032633781433105, |
|
"eval_runtime": 31.708, |
|
"eval_samples_per_second": 259.209, |
|
"eval_steps_per_second": 12.962, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 39.948619139370585, |
|
"grad_norm": 3.2846930027008057, |
|
"learning_rate": 6.0019292604501615e-06, |
|
"loss": 1.957, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 39.948619139370585, |
|
"eval_accuracy": 0.07692541671736221, |
|
"eval_loss": 3.7207980155944824, |
|
"eval_runtime": 31.5878, |
|
"eval_samples_per_second": 260.195, |
|
"eval_steps_per_second": 13.011, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 49.93577392421323, |
|
"grad_norm": 1.5283381938934326, |
|
"learning_rate": 5.002572347266882e-06, |
|
"loss": 1.9313, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 49.93577392421323, |
|
"eval_accuracy": 0.07587297724784037, |
|
"eval_loss": 3.817392110824585, |
|
"eval_runtime": 31.8048, |
|
"eval_samples_per_second": 258.421, |
|
"eval_steps_per_second": 12.923, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 59.922928709055874, |
|
"grad_norm": 1.3086577653884888, |
|
"learning_rate": 4.002893890675241e-06, |
|
"loss": 1.9255, |
|
"step": 18660 |
|
}, |
|
{ |
|
"epoch": 59.922928709055874, |
|
"eval_accuracy": 0.07484689946059943, |
|
"eval_loss": 3.914490222930908, |
|
"eval_runtime": 31.7322, |
|
"eval_samples_per_second": 259.011, |
|
"eval_steps_per_second": 12.952, |
|
"step": 18660 |
|
}, |
|
{ |
|
"epoch": 69.91008349389853, |
|
"grad_norm": 0.9638305902481079, |
|
"learning_rate": 3.0035369774919613e-06, |
|
"loss": 1.9179, |
|
"step": 21770 |
|
}, |
|
{ |
|
"epoch": 69.91008349389853, |
|
"eval_accuracy": 0.07458328263779049, |
|
"eval_loss": 4.036659240722656, |
|
"eval_runtime": 31.8221, |
|
"eval_samples_per_second": 258.279, |
|
"eval_steps_per_second": 12.916, |
|
"step": 21770 |
|
}, |
|
{ |
|
"epoch": 79.89723827874117, |
|
"grad_norm": 1.957848072052002, |
|
"learning_rate": 2.0041800643086816e-06, |
|
"loss": 1.9133, |
|
"step": 24880 |
|
}, |
|
{ |
|
"epoch": 79.89723827874117, |
|
"eval_accuracy": 0.0740205621121791, |
|
"eval_loss": 4.068979740142822, |
|
"eval_runtime": 31.7024, |
|
"eval_samples_per_second": 259.255, |
|
"eval_steps_per_second": 12.964, |
|
"step": 24880 |
|
}, |
|
{ |
|
"epoch": 89.88439306358381, |
|
"grad_norm": 0.8189646005630493, |
|
"learning_rate": 1.0045016077170419e-06, |
|
"loss": 1.9102, |
|
"step": 27990 |
|
}, |
|
{ |
|
"epoch": 89.88439306358381, |
|
"eval_accuracy": 0.07374511632937232, |
|
"eval_loss": 4.122669219970703, |
|
"eval_runtime": 31.652, |
|
"eval_samples_per_second": 259.667, |
|
"eval_steps_per_second": 12.985, |
|
"step": 27990 |
|
}, |
|
{ |
|
"epoch": 99.87154784842646, |
|
"grad_norm": 0.5666019320487976, |
|
"learning_rate": 5.144694533762058e-09, |
|
"loss": 1.9084, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 99.87154784842646, |
|
"eval_accuracy": 0.07336658960944153, |
|
"eval_loss": 4.205153942108154, |
|
"eval_runtime": 31.8056, |
|
"eval_samples_per_second": 258.414, |
|
"eval_steps_per_second": 12.922, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 99.87154784842646, |
|
"step": 31100, |
|
"total_flos": 2.458812304495296e+18, |
|
"train_loss": 1.9977928117463826, |
|
"train_runtime": 55190.6147, |
|
"train_samples_per_second": 112.8, |
|
"train_steps_per_second": 0.564 |
|
} |
|
], |
|
"logging_steps": 3110, |
|
"max_steps": 31100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.458812304495296e+18, |
|
"train_batch_size": 40, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|