{ "best_metric": 0.16516298, "best_model_checkpoint": "/home/patrickbarker/output/qwen2-vl-7b-instruct/v3-20241215-182208/checkpoint-93", "epoch": 3.0, "eval_steps": 200, "global_step": 93, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "acc": 0.84791154, "epoch": 0.03225806451612903, "grad_norm": 27.065223316478477, "learning_rate": 0.0, "loss": 1.10534477, "memory(GiB)": 55.66, "step": 1, "train_speed(iter/s)": 0.022247 }, { "acc": 0.83733273, "epoch": 0.16129032258064516, "grad_norm": 17.03496531553331, "learning_rate": 1e-05, "loss": 1.06814337, "memory(GiB)": 76.03, "step": 5, "train_speed(iter/s)": 0.02886 }, { "acc": 0.91679258, "epoch": 0.3225806451612903, "grad_norm": 7.623726487239116, "learning_rate": 9.920563966078412e-06, "loss": 0.49230399, "memory(GiB)": 73.79, "step": 10, "train_speed(iter/s)": 0.030297 }, { "acc": 0.92166452, "epoch": 0.4838709677419355, "grad_norm": 3.6617099092671213, "learning_rate": 9.68478015013631e-06, "loss": 0.49300199, "memory(GiB)": 73.79, "step": 15, "train_speed(iter/s)": 0.030869 }, { "acc": 0.93338518, "epoch": 0.6451612903225806, "grad_norm": 3.119220979880785, "learning_rate": 9.3001411939184e-06, "loss": 0.41259723, "memory(GiB)": 73.79, "step": 20, "train_speed(iter/s)": 0.031158 }, { "acc": 0.92974329, "epoch": 0.8064516129032258, "grad_norm": 4.078533788825603, "learning_rate": 8.778869996984113e-06, "loss": 0.38431735, "memory(GiB)": 73.79, "step": 25, "train_speed(iter/s)": 0.031339 }, { "acc": 0.91595602, "epoch": 0.967741935483871, "grad_norm": 10.998030685015813, "learning_rate": 8.13753130240057e-06, "loss": 0.40506763, "memory(GiB)": 73.79, "step": 30, "train_speed(iter/s)": 0.031457 }, { "acc": 0.9471405, "epoch": 1.129032258064516, "grad_norm": 2.6744991236459517, "learning_rate": 7.396505309106925e-06, "loss": 0.27681351, "memory(GiB)": 73.79, "step": 35, "train_speed(iter/s)": 0.031512 }, { "acc": 0.932164, "epoch": 1.2903225806451613, "grad_norm": 2.113425035726855, "learning_rate": 6.579340038281318e-06, "loss": 0.35436311, "memory(GiB)": 73.79, "step": 40, "train_speed(iter/s)": 0.031584 }, { "acc": 0.92934284, "epoch": 1.4516129032258065, "grad_norm": 2.6570760550105366, "learning_rate": 5.712003033947289e-06, "loss": 0.3613925, "memory(GiB)": 73.79, "step": 45, "train_speed(iter/s)": 0.031638 }, { "acc": 0.93117428, "epoch": 1.6129032258064515, "grad_norm": 0.9540185420283845, "learning_rate": 4.822056176972018e-06, "loss": 0.37923932, "memory(GiB)": 73.79, "step": 50, "train_speed(iter/s)": 0.031682 }, { "acc": 0.93338251, "epoch": 1.7741935483870968, "grad_norm": 1.4962776432309268, "learning_rate": 3.937779834879894e-06, "loss": 0.37198489, "memory(GiB)": 73.79, "step": 55, "train_speed(iter/s)": 0.031717 }, { "acc": 0.93398943, "epoch": 1.935483870967742, "grad_norm": 1.8156212498163269, "learning_rate": 3.0872741798907336e-06, "loss": 0.36215272, "memory(GiB)": 73.79, "step": 60, "train_speed(iter/s)": 0.031744 }, { "acc": 0.95252762, "epoch": 2.096774193548387, "grad_norm": 1.6592309190503378, "learning_rate": 2.2975662331307417e-06, "loss": 0.2656436, "memory(GiB)": 73.79, "step": 65, "train_speed(iter/s)": 0.031749 }, { "acc": 0.93537579, "epoch": 2.258064516129032, "grad_norm": 1.698659282567525, "learning_rate": 1.5937510110003426e-06, "loss": 0.31273713, "memory(GiB)": 73.79, "step": 70, "train_speed(iter/s)": 0.031772 }, { "acc": 0.94613972, "epoch": 2.4193548387096775, "grad_norm": 1.5348577615781052, "learning_rate": 9.981940659986607e-07, "loss": 0.28420959, "memory(GiB)": 73.79, "step": 75, "train_speed(iter/s)": 0.031793 }, { "acc": 0.93988018, "epoch": 2.5806451612903225, "grad_norm": 1.7810683323628338, "learning_rate": 5.298207633363734e-07, "loss": 0.30640914, "memory(GiB)": 73.79, "step": 80, "train_speed(iter/s)": 0.031807 }, { "acc": 0.93680553, "epoch": 2.741935483870968, "grad_norm": 2.5905151386873873, "learning_rate": 2.035148784143204e-07, "loss": 0.33139644, "memory(GiB)": 73.79, "step": 85, "train_speed(iter/s)": 0.031819 }, { "acc": 0.94427986, "epoch": 2.903225806451613, "grad_norm": 1.829616630873348, "learning_rate": 2.9645626291399768e-08, "loss": 0.32220521, "memory(GiB)": 73.79, "step": 90, "train_speed(iter/s)": 0.03183 }, { "epoch": 3.0, "eval_acc": 0.9263189812007278, "eval_loss": 0.16516298055648804, "eval_runtime": 76.0876, "eval_samples_per_second": 1.603, "eval_steps_per_second": 0.21, "step": 93 } ], "logging_steps": 5, "max_steps": 93, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 216369601708032.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }