|
{ |
|
"best_metric": 0.6703549228453575, |
|
"best_model_checkpoint": "output/pretraining/vihealthbert-w_unsup-SynPD/lr3e-5_wr0.1_wd0.0/checkpoint-30000", |
|
"epoch": 5.1697397897639155, |
|
"eval_steps": 5000, |
|
"global_step": 30000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00017232465965879716, |
|
"grad_norm": 41.827266693115234, |
|
"learning_rate": 5.169739789763915e-09, |
|
"loss": 13.7242, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.8616232982939859, |
|
"grad_norm": 5.6151957511901855, |
|
"learning_rate": 2.584869894881958e-05, |
|
"loss": 7.0234, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8616232982939859, |
|
"eval_accuracy": 0.5576033171446898, |
|
"eval_loss": 2.590928077697754, |
|
"eval_runtime": 37.589, |
|
"eval_samples_per_second": 260.023, |
|
"eval_steps_per_second": 16.255, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.7232465965879717, |
|
"grad_norm": 4.410743713378906, |
|
"learning_rate": 2.758917801137343e-05, |
|
"loss": 5.2736, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.7232465965879717, |
|
"eval_accuracy": 0.596153624823046, |
|
"eval_loss": 2.188957452774048, |
|
"eval_runtime": 37.6297, |
|
"eval_samples_per_second": 259.741, |
|
"eval_steps_per_second": 16.237, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.5848698948819577, |
|
"grad_norm": 4.754213809967041, |
|
"learning_rate": 2.4717100350393475e-05, |
|
"loss": 4.9126, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.5848698948819577, |
|
"eval_accuracy": 0.6381037526075694, |
|
"eval_loss": 1.909491777420044, |
|
"eval_runtime": 36.8556, |
|
"eval_samples_per_second": 265.197, |
|
"eval_steps_per_second": 16.578, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.4464931931759435, |
|
"grad_norm": 4.2047200202941895, |
|
"learning_rate": 2.184502268941352e-05, |
|
"loss": 4.791, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.4464931931759435, |
|
"eval_accuracy": 0.6468589272593681, |
|
"eval_loss": 1.8286069631576538, |
|
"eval_runtime": 35.9342, |
|
"eval_samples_per_second": 271.997, |
|
"eval_steps_per_second": 17.003, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.30811649146993, |
|
"grad_norm": 4.629519462585449, |
|
"learning_rate": 1.897294502843357e-05, |
|
"loss": 4.6538, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.30811649146993, |
|
"eval_accuracy": 0.6644416567883901, |
|
"eval_loss": 1.714424967765808, |
|
"eval_runtime": 35.6122, |
|
"eval_samples_per_second": 274.456, |
|
"eval_steps_per_second": 17.157, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 5.1697397897639155, |
|
"grad_norm": 3.993448257446289, |
|
"learning_rate": 1.6100867367453616e-05, |
|
"loss": 4.5846, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 5.1697397897639155, |
|
"eval_accuracy": 0.6703549228453575, |
|
"eval_loss": 1.6779303550720215, |
|
"eval_runtime": 36.1572, |
|
"eval_samples_per_second": 270.32, |
|
"eval_steps_per_second": 16.898, |
|
"step": 30000 |
|
} |
|
], |
|
"logging_steps": 5000, |
|
"max_steps": 58030, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 5000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|