|
{ |
|
"best_metric": 0.6879923855830649, |
|
"best_model_checkpoint": "output/pretraining/vihealthbert-w_unsup-SynPD/lr3e-5_wr0.1_wd0.0/checkpoint-55000", |
|
"epoch": 10.0, |
|
"eval_steps": 5000, |
|
"global_step": 58030, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00017232465965879716, |
|
"grad_norm": 41.827266693115234, |
|
"learning_rate": 5.169739789763915e-09, |
|
"loss": 13.7242, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.8616232982939859, |
|
"grad_norm": 5.6151957511901855, |
|
"learning_rate": 2.584869894881958e-05, |
|
"loss": 7.0234, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8616232982939859, |
|
"eval_accuracy": 0.5576033171446898, |
|
"eval_loss": 2.590928077697754, |
|
"eval_runtime": 37.589, |
|
"eval_samples_per_second": 260.023, |
|
"eval_steps_per_second": 16.255, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.7232465965879717, |
|
"grad_norm": 4.410743713378906, |
|
"learning_rate": 2.758917801137343e-05, |
|
"loss": 5.2736, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.7232465965879717, |
|
"eval_accuracy": 0.596153624823046, |
|
"eval_loss": 2.188957452774048, |
|
"eval_runtime": 37.6297, |
|
"eval_samples_per_second": 259.741, |
|
"eval_steps_per_second": 16.237, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.5848698948819577, |
|
"grad_norm": 4.754213809967041, |
|
"learning_rate": 2.4717100350393475e-05, |
|
"loss": 4.9126, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.5848698948819577, |
|
"eval_accuracy": 0.6381037526075694, |
|
"eval_loss": 1.909491777420044, |
|
"eval_runtime": 36.8556, |
|
"eval_samples_per_second": 265.197, |
|
"eval_steps_per_second": 16.578, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.4464931931759435, |
|
"grad_norm": 4.2047200202941895, |
|
"learning_rate": 2.184502268941352e-05, |
|
"loss": 4.791, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.4464931931759435, |
|
"eval_accuracy": 0.6468589272593681, |
|
"eval_loss": 1.8286069631576538, |
|
"eval_runtime": 35.9342, |
|
"eval_samples_per_second": 271.997, |
|
"eval_steps_per_second": 17.003, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.30811649146993, |
|
"grad_norm": 4.629519462585449, |
|
"learning_rate": 1.897294502843357e-05, |
|
"loss": 4.6538, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.30811649146993, |
|
"eval_accuracy": 0.6644416567883901, |
|
"eval_loss": 1.714424967765808, |
|
"eval_runtime": 35.6122, |
|
"eval_samples_per_second": 274.456, |
|
"eval_steps_per_second": 17.157, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 5.1697397897639155, |
|
"grad_norm": 3.993448257446289, |
|
"learning_rate": 1.6100867367453616e-05, |
|
"loss": 4.5846, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 5.1697397897639155, |
|
"eval_accuracy": 0.6703549228453575, |
|
"eval_loss": 1.6779303550720215, |
|
"eval_runtime": 36.1572, |
|
"eval_samples_per_second": 270.32, |
|
"eval_steps_per_second": 16.898, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 6.031363088057901, |
|
"grad_norm": 4.6959357261657715, |
|
"learning_rate": 1.3228789706473663e-05, |
|
"loss": 4.5568, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 6.031363088057901, |
|
"eval_accuracy": 0.6765665494261385, |
|
"eval_loss": 1.6362268924713135, |
|
"eval_runtime": 36.2906, |
|
"eval_samples_per_second": 269.326, |
|
"eval_steps_per_second": 16.836, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 6.892986386351887, |
|
"grad_norm": 4.4082441329956055, |
|
"learning_rate": 1.035671204549371e-05, |
|
"loss": 4.5079, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 6.892986386351887, |
|
"eval_accuracy": 0.681425908452811, |
|
"eval_loss": 1.6008453369140625, |
|
"eval_runtime": 36.1284, |
|
"eval_samples_per_second": 270.535, |
|
"eval_steps_per_second": 16.912, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 7.754609684645873, |
|
"grad_norm": 5.832913398742676, |
|
"learning_rate": 7.484634384513757e-06, |
|
"loss": 4.469, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 7.754609684645873, |
|
"eval_accuracy": 0.6805218690729352, |
|
"eval_loss": 1.6063588857650757, |
|
"eval_runtime": 36.0752, |
|
"eval_samples_per_second": 270.934, |
|
"eval_steps_per_second": 16.937, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 8.61623298293986, |
|
"grad_norm": 3.991995096206665, |
|
"learning_rate": 4.612556723533804e-06, |
|
"loss": 4.4514, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 8.61623298293986, |
|
"eval_accuracy": 0.6852411542306852, |
|
"eval_loss": 1.5799689292907715, |
|
"eval_runtime": 37.5803, |
|
"eval_samples_per_second": 260.083, |
|
"eval_steps_per_second": 16.259, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 9.477856281233844, |
|
"grad_norm": 4.424502849578857, |
|
"learning_rate": 1.7404790625538515e-06, |
|
"loss": 4.4317, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 9.477856281233844, |
|
"eval_accuracy": 0.6879923855830649, |
|
"eval_loss": 1.5539859533309937, |
|
"eval_runtime": 35.7439, |
|
"eval_samples_per_second": 273.445, |
|
"eval_steps_per_second": 17.094, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 58030, |
|
"total_flos": 0.0, |
|
"train_loss": 4.85471958516693, |
|
"train_runtime": 21406.4835, |
|
"train_samples_per_second": 86.742, |
|
"train_steps_per_second": 2.711 |
|
} |
|
], |
|
"logging_steps": 5000, |
|
"max_steps": 58030, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 5000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|