|
{ |
|
"best_metric": 0.5313496280552603, |
|
"best_model_checkpoint": "timesformer-base-finetuned-k400-finetuned-ElderReact-anger-balanced/checkpoint-75", |
|
"epoch": 9.0625, |
|
"eval_steps": 500, |
|
"global_step": 240, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 10.033487319946289, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.7267, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 10.82132339477539, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.6923, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.46014877789585545, |
|
"eval_loss": 0.8338257670402527, |
|
"eval_runtime": 193.5219, |
|
"eval_samples_per_second": 4.862, |
|
"eval_steps_per_second": 0.305, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 6.4887261390686035, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 0.5405, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 4.2266340255737305, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.5519, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 38.938472747802734, |
|
"learning_rate": 4.3981481481481486e-05, |
|
"loss": 0.6436, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.5058448459086079, |
|
"eval_loss": 0.9187208414077759, |
|
"eval_runtime": 186.1328, |
|
"eval_samples_per_second": 5.056, |
|
"eval_steps_per_second": 0.317, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 5.287866115570068, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.4336, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 8.11617374420166, |
|
"learning_rate": 3.935185185185186e-05, |
|
"loss": 0.5464, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_accuracy": 0.5313496280552603, |
|
"eval_loss": 0.8164978623390198, |
|
"eval_runtime": 165.1643, |
|
"eval_samples_per_second": 5.697, |
|
"eval_steps_per_second": 0.357, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 9.028487205505371, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.5459, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 4.760679721832275, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.4503, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 13.96630573272705, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.4147, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_accuracy": 0.5228480340063762, |
|
"eval_loss": 0.9949731230735779, |
|
"eval_runtime": 166.6223, |
|
"eval_samples_per_second": 5.648, |
|
"eval_steps_per_second": 0.354, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 3.802431344985962, |
|
"learning_rate": 3.0092592592592593e-05, |
|
"loss": 0.3409, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 9.562992095947266, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.4483, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_accuracy": 0.5079702444208289, |
|
"eval_loss": 1.064165711402893, |
|
"eval_runtime": 194.4496, |
|
"eval_samples_per_second": 4.839, |
|
"eval_steps_per_second": 0.303, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 8.446785926818848, |
|
"learning_rate": 2.5462962962962965e-05, |
|
"loss": 0.6922, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 3.8589024543762207, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.3718, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"grad_norm": 5.9254536628723145, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.3565, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"eval_accuracy": 0.5143464399574921, |
|
"eval_loss": 0.8610547780990601, |
|
"eval_runtime": 184.6388, |
|
"eval_samples_per_second": 5.096, |
|
"eval_steps_per_second": 0.32, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 5.898458003997803, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.3232, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 3.0727579593658447, |
|
"learning_rate": 1.6203703703703704e-05, |
|
"loss": 0.3114, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_accuracy": 0.5185972369819342, |
|
"eval_loss": 0.9594563245773315, |
|
"eval_runtime": 185.3195, |
|
"eval_samples_per_second": 5.078, |
|
"eval_steps_per_second": 0.318, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 1.4301713705062866, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.253, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 6.7790045738220215, |
|
"learning_rate": 1.1574074074074075e-05, |
|
"loss": 0.3144, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"grad_norm": 26.213571548461914, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.3074, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_accuracy": 0.5196599362380446, |
|
"eval_loss": 1.0241553783416748, |
|
"eval_runtime": 191.958, |
|
"eval_samples_per_second": 4.902, |
|
"eval_steps_per_second": 0.307, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 3.681013822555542, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.2552, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 6.130499839782715, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.2888, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"eval_accuracy": 0.5069075451647184, |
|
"eval_loss": 1.1212139129638672, |
|
"eval_runtime": 182.5202, |
|
"eval_samples_per_second": 5.156, |
|
"eval_steps_per_second": 0.323, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 4.048097133636475, |
|
"learning_rate": 2.3148148148148148e-06, |
|
"loss": 0.2812, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 2.3711907863616943, |
|
"learning_rate": 0.0, |
|
"loss": 0.2609, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"eval_accuracy": 0.5111583421891605, |
|
"eval_loss": 1.111896276473999, |
|
"eval_runtime": 168.1115, |
|
"eval_samples_per_second": 5.597, |
|
"eval_steps_per_second": 0.351, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"step": 240, |
|
"total_flos": 3.246125779066061e+18, |
|
"train_loss": 0.43130043546358743, |
|
"train_runtime": 2651.285, |
|
"train_samples_per_second": 1.448, |
|
"train_steps_per_second": 0.091 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"eval_accuracy": 0.5011600928074246, |
|
"eval_loss": 0.8047753572463989, |
|
"eval_runtime": 160.408, |
|
"eval_samples_per_second": 5.374, |
|
"eval_steps_per_second": 0.337, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"eval_accuracy": 0.5011600928074246, |
|
"eval_loss": 0.8047754168510437, |
|
"eval_runtime": 160.5206, |
|
"eval_samples_per_second": 5.37, |
|
"eval_steps_per_second": 0.336, |
|
"step": 240 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 240, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 3.246125779066061e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|