|
{ |
|
"best_metric": 0.017106017097830772, |
|
"best_model_checkpoint": "saves/chess/tactic/checkpoint-1000", |
|
"epoch": 5.0, |
|
"eval_steps": 1000, |
|
"global_step": 3075, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16260162601626016, |
|
"grad_norm": 4.799216229524151, |
|
"learning_rate": 1.6233766233766235e-06, |
|
"loss": 0.8994, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3252032520325203, |
|
"grad_norm": 0.7153367237536395, |
|
"learning_rate": 3.246753246753247e-06, |
|
"loss": 0.0317, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 0.7387556653345047, |
|
"learning_rate": 4.870129870129871e-06, |
|
"loss": 0.0245, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6504065040650406, |
|
"grad_norm": 1.9740058439148331, |
|
"learning_rate": 4.986373880811079e-06, |
|
"loss": 0.0222, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"grad_norm": 0.6344698055959135, |
|
"learning_rate": 4.940833840455932e-06, |
|
"loss": 0.0208, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 0.6970636016265686, |
|
"learning_rate": 4.863863172170709e-06, |
|
"loss": 0.02, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.1382113821138211, |
|
"grad_norm": 0.6801856070533878, |
|
"learning_rate": 4.756453027584134e-06, |
|
"loss": 0.0171, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.3008130081300813, |
|
"grad_norm": 0.275933332771696, |
|
"learning_rate": 4.619986527593033e-06, |
|
"loss": 0.0173, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"grad_norm": 0.5872805765471233, |
|
"learning_rate": 4.4562209519085615e-06, |
|
"loss": 0.0174, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.6260162601626016, |
|
"grad_norm": 0.3927107277822733, |
|
"learning_rate": 4.26726511055776e-06, |
|
"loss": 0.0169, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6260162601626016, |
|
"eval_loss": 0.017106017097830772, |
|
"eval_runtime": 190.9398, |
|
"eval_samples_per_second": 183.105, |
|
"eval_steps_per_second": 0.718, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.7886178861788617, |
|
"grad_norm": 0.33192992931188736, |
|
"learning_rate": 4.055552188727706e-06, |
|
"loss": 0.0159, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.951219512195122, |
|
"grad_norm": 0.4102076449010124, |
|
"learning_rate": 3.823808414629323e-06, |
|
"loss": 0.016, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.113821138211382, |
|
"grad_norm": 0.4366312248115214, |
|
"learning_rate": 3.575017953844908e-06, |
|
"loss": 0.0126, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.2764227642276422, |
|
"grad_norm": 0.38777672675426217, |
|
"learning_rate": 3.3123844822150126e-06, |
|
"loss": 0.0119, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": 0.573942741447281, |
|
"learning_rate": 3.0392899320907716e-06, |
|
"loss": 0.0118, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.6016260162601625, |
|
"grad_norm": 0.4798738867766201, |
|
"learning_rate": 2.759250943176377e-06, |
|
"loss": 0.0117, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.7642276422764227, |
|
"grad_norm": 0.2747941215742872, |
|
"learning_rate": 2.4758735787443878e-06, |
|
"loss": 0.0116, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.926829268292683, |
|
"grad_norm": 0.39700972538910706, |
|
"learning_rate": 2.192806890343352e-06, |
|
"loss": 0.0111, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.089430894308943, |
|
"grad_norm": 0.45186864274833893, |
|
"learning_rate": 1.9136959289452223e-06, |
|
"loss": 0.0077, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.252032520325203, |
|
"grad_norm": 0.6795786745533399, |
|
"learning_rate": 1.6421348076082123e-06, |
|
"loss": 0.0049, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.252032520325203, |
|
"eval_loss": 0.023808766156435013, |
|
"eval_runtime": 191.1442, |
|
"eval_samples_per_second": 182.909, |
|
"eval_steps_per_second": 0.717, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.4146341463414633, |
|
"grad_norm": 0.6090255517872498, |
|
"learning_rate": 1.3816204200673827e-06, |
|
"loss": 0.0052, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.5772357723577235, |
|
"grad_norm": 0.7553892788821762, |
|
"learning_rate": 1.1355074112188802e-06, |
|
"loss": 0.0048, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.7398373983739837, |
|
"grad_norm": 0.33557205962825315, |
|
"learning_rate": 9.069649793430869e-07, |
|
"loss": 0.0046, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.902439024390244, |
|
"grad_norm": 0.39121616514997803, |
|
"learning_rate": 6.989360663246406e-07, |
|
"loss": 0.0043, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.065040650406504, |
|
"grad_norm": 0.37417208848590455, |
|
"learning_rate": 5.1409946137705e-07, |
|
"loss": 0.003, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.227642276422764, |
|
"grad_norm": 0.3450534162422346, |
|
"learning_rate": 3.548353062623949e-07, |
|
"loss": 0.0007, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.390243902439025, |
|
"grad_norm": 0.055395596176947635, |
|
"learning_rate": 2.231944461955507e-07, |
|
"loss": 0.0007, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.5528455284552845, |
|
"grad_norm": 0.7534781076862362, |
|
"learning_rate": 1.2087202110147994e-07, |
|
"loss": 0.0007, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.715447154471545, |
|
"grad_norm": 0.6312940968696128, |
|
"learning_rate": 4.9185637291078724e-08, |
|
"loss": 0.0006, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.878048780487805, |
|
"grad_norm": 0.6118892403786919, |
|
"learning_rate": 9.058400639009313e-09, |
|
"loss": 0.0005, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.878048780487805, |
|
"eval_loss": 0.03251836076378822, |
|
"eval_runtime": 191.2785, |
|
"eval_samples_per_second": 182.781, |
|
"eval_steps_per_second": 0.716, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3075, |
|
"total_flos": 502860412354560.0, |
|
"train_loss": 0.03991650681670119, |
|
"train_runtime": 33327.5964, |
|
"train_samples_per_second": 47.207, |
|
"train_steps_per_second": 0.092 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 3075, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 502860412354560.0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|