|
{ |
|
"best_metric": 0.16516298, |
|
"best_model_checkpoint": "/home/patrickbarker/output/qwen2-vl-7b-instruct/v3-20241215-182208/checkpoint-93", |
|
"epoch": 3.0, |
|
"eval_steps": 200, |
|
"global_step": 93, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"acc": 0.84791154, |
|
"epoch": 0.03225806451612903, |
|
"grad_norm": 27.065223316478477, |
|
"learning_rate": 0.0, |
|
"loss": 1.10534477, |
|
"memory(GiB)": 55.66, |
|
"step": 1, |
|
"train_speed(iter/s)": 0.022247 |
|
}, |
|
{ |
|
"acc": 0.83733273, |
|
"epoch": 0.16129032258064516, |
|
"grad_norm": 17.03496531553331, |
|
"learning_rate": 1e-05, |
|
"loss": 1.06814337, |
|
"memory(GiB)": 76.03, |
|
"step": 5, |
|
"train_speed(iter/s)": 0.02886 |
|
}, |
|
{ |
|
"acc": 0.91679258, |
|
"epoch": 0.3225806451612903, |
|
"grad_norm": 7.623726487239116, |
|
"learning_rate": 9.920563966078412e-06, |
|
"loss": 0.49230399, |
|
"memory(GiB)": 73.79, |
|
"step": 10, |
|
"train_speed(iter/s)": 0.030297 |
|
}, |
|
{ |
|
"acc": 0.92166452, |
|
"epoch": 0.4838709677419355, |
|
"grad_norm": 3.6617099092671213, |
|
"learning_rate": 9.68478015013631e-06, |
|
"loss": 0.49300199, |
|
"memory(GiB)": 73.79, |
|
"step": 15, |
|
"train_speed(iter/s)": 0.030869 |
|
}, |
|
{ |
|
"acc": 0.93338518, |
|
"epoch": 0.6451612903225806, |
|
"grad_norm": 3.119220979880785, |
|
"learning_rate": 9.3001411939184e-06, |
|
"loss": 0.41259723, |
|
"memory(GiB)": 73.79, |
|
"step": 20, |
|
"train_speed(iter/s)": 0.031158 |
|
}, |
|
{ |
|
"acc": 0.92974329, |
|
"epoch": 0.8064516129032258, |
|
"grad_norm": 4.078533788825603, |
|
"learning_rate": 8.778869996984113e-06, |
|
"loss": 0.38431735, |
|
"memory(GiB)": 73.79, |
|
"step": 25, |
|
"train_speed(iter/s)": 0.031339 |
|
}, |
|
{ |
|
"acc": 0.91595602, |
|
"epoch": 0.967741935483871, |
|
"grad_norm": 10.998030685015813, |
|
"learning_rate": 8.13753130240057e-06, |
|
"loss": 0.40506763, |
|
"memory(GiB)": 73.79, |
|
"step": 30, |
|
"train_speed(iter/s)": 0.031457 |
|
}, |
|
{ |
|
"acc": 0.9471405, |
|
"epoch": 1.129032258064516, |
|
"grad_norm": 2.6744991236459517, |
|
"learning_rate": 7.396505309106925e-06, |
|
"loss": 0.27681351, |
|
"memory(GiB)": 73.79, |
|
"step": 35, |
|
"train_speed(iter/s)": 0.031512 |
|
}, |
|
{ |
|
"acc": 0.932164, |
|
"epoch": 1.2903225806451613, |
|
"grad_norm": 2.113425035726855, |
|
"learning_rate": 6.579340038281318e-06, |
|
"loss": 0.35436311, |
|
"memory(GiB)": 73.79, |
|
"step": 40, |
|
"train_speed(iter/s)": 0.031584 |
|
}, |
|
{ |
|
"acc": 0.92934284, |
|
"epoch": 1.4516129032258065, |
|
"grad_norm": 2.6570760550105366, |
|
"learning_rate": 5.712003033947289e-06, |
|
"loss": 0.3613925, |
|
"memory(GiB)": 73.79, |
|
"step": 45, |
|
"train_speed(iter/s)": 0.031638 |
|
}, |
|
{ |
|
"acc": 0.93117428, |
|
"epoch": 1.6129032258064515, |
|
"grad_norm": 0.9540185420283845, |
|
"learning_rate": 4.822056176972018e-06, |
|
"loss": 0.37923932, |
|
"memory(GiB)": 73.79, |
|
"step": 50, |
|
"train_speed(iter/s)": 0.031682 |
|
}, |
|
{ |
|
"acc": 0.93338251, |
|
"epoch": 1.7741935483870968, |
|
"grad_norm": 1.4962776432309268, |
|
"learning_rate": 3.937779834879894e-06, |
|
"loss": 0.37198489, |
|
"memory(GiB)": 73.79, |
|
"step": 55, |
|
"train_speed(iter/s)": 0.031717 |
|
}, |
|
{ |
|
"acc": 0.93398943, |
|
"epoch": 1.935483870967742, |
|
"grad_norm": 1.8156212498163269, |
|
"learning_rate": 3.0872741798907336e-06, |
|
"loss": 0.36215272, |
|
"memory(GiB)": 73.79, |
|
"step": 60, |
|
"train_speed(iter/s)": 0.031744 |
|
}, |
|
{ |
|
"acc": 0.95252762, |
|
"epoch": 2.096774193548387, |
|
"grad_norm": 1.6592309190503378, |
|
"learning_rate": 2.2975662331307417e-06, |
|
"loss": 0.2656436, |
|
"memory(GiB)": 73.79, |
|
"step": 65, |
|
"train_speed(iter/s)": 0.031749 |
|
}, |
|
{ |
|
"acc": 0.93537579, |
|
"epoch": 2.258064516129032, |
|
"grad_norm": 1.698659282567525, |
|
"learning_rate": 1.5937510110003426e-06, |
|
"loss": 0.31273713, |
|
"memory(GiB)": 73.79, |
|
"step": 70, |
|
"train_speed(iter/s)": 0.031772 |
|
}, |
|
{ |
|
"acc": 0.94613972, |
|
"epoch": 2.4193548387096775, |
|
"grad_norm": 1.5348577615781052, |
|
"learning_rate": 9.981940659986607e-07, |
|
"loss": 0.28420959, |
|
"memory(GiB)": 73.79, |
|
"step": 75, |
|
"train_speed(iter/s)": 0.031793 |
|
}, |
|
{ |
|
"acc": 0.93988018, |
|
"epoch": 2.5806451612903225, |
|
"grad_norm": 1.7810683323628338, |
|
"learning_rate": 5.298207633363734e-07, |
|
"loss": 0.30640914, |
|
"memory(GiB)": 73.79, |
|
"step": 80, |
|
"train_speed(iter/s)": 0.031807 |
|
}, |
|
{ |
|
"acc": 0.93680553, |
|
"epoch": 2.741935483870968, |
|
"grad_norm": 2.5905151386873873, |
|
"learning_rate": 2.035148784143204e-07, |
|
"loss": 0.33139644, |
|
"memory(GiB)": 73.79, |
|
"step": 85, |
|
"train_speed(iter/s)": 0.031819 |
|
}, |
|
{ |
|
"acc": 0.94427986, |
|
"epoch": 2.903225806451613, |
|
"grad_norm": 1.829616630873348, |
|
"learning_rate": 2.9645626291399768e-08, |
|
"loss": 0.32220521, |
|
"memory(GiB)": 73.79, |
|
"step": 90, |
|
"train_speed(iter/s)": 0.03183 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_acc": 0.9263189812007278, |
|
"eval_loss": 0.16516298055648804, |
|
"eval_runtime": 76.0876, |
|
"eval_samples_per_second": 1.603, |
|
"eval_steps_per_second": 0.21, |
|
"step": 93 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 93, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 216369601708032.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|