|
{ |
|
"best_metric": 0.8837955535182214, |
|
"best_model_checkpoint": "swin-base-patch4-window7-224-in22k-MM_Classification_base_web_images/checkpoint-342", |
|
"epoch": 6.948905109489051, |
|
"eval_steps": 500, |
|
"global_step": 476, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.145985401459854, |
|
"grad_norm": 3.675107717514038, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 1.0612, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.291970802919708, |
|
"grad_norm": 3.70519757270813, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.8853, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.43795620437956206, |
|
"grad_norm": 3.8755295276641846, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.7099, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.583941605839416, |
|
"grad_norm": 2.727118492126465, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.6121, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.7299270072992701, |
|
"grad_norm": 3.260768175125122, |
|
"learning_rate": 4.976635514018692e-05, |
|
"loss": 0.5736, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8759124087591241, |
|
"grad_norm": 5.847088813781738, |
|
"learning_rate": 4.85981308411215e-05, |
|
"loss": 0.517, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9927007299270073, |
|
"eval_accuracy": 0.8157231262892505, |
|
"eval_loss": 0.4430324137210846, |
|
"eval_runtime": 35.8383, |
|
"eval_samples_per_second": 121.741, |
|
"eval_steps_per_second": 1.925, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.0218978102189782, |
|
"grad_norm": 5.806203365325928, |
|
"learning_rate": 4.742990654205608e-05, |
|
"loss": 0.4972, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.167883211678832, |
|
"grad_norm": 2.751803159713745, |
|
"learning_rate": 4.6261682242990654e-05, |
|
"loss": 0.4767, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.313868613138686, |
|
"grad_norm": 2.5074498653411865, |
|
"learning_rate": 4.5093457943925236e-05, |
|
"loss": 0.4509, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.4598540145985401, |
|
"grad_norm": 3.3779966831207275, |
|
"learning_rate": 4.392523364485982e-05, |
|
"loss": 0.4496, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6058394160583942, |
|
"grad_norm": 4.8917741775512695, |
|
"learning_rate": 4.27570093457944e-05, |
|
"loss": 0.4528, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.7518248175182483, |
|
"grad_norm": 4.1523966789245605, |
|
"learning_rate": 4.1588785046728974e-05, |
|
"loss": 0.4535, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.897810218978102, |
|
"grad_norm": 6.303163051605225, |
|
"learning_rate": 4.0420560747663556e-05, |
|
"loss": 0.4211, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8457483382993353, |
|
"eval_loss": 0.38001278042793274, |
|
"eval_runtime": 36.8858, |
|
"eval_samples_per_second": 118.284, |
|
"eval_steps_per_second": 1.871, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 2.0437956204379564, |
|
"grad_norm": 6.562156677246094, |
|
"learning_rate": 3.925233644859813e-05, |
|
"loss": 0.4435, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.18978102189781, |
|
"grad_norm": 3.601300001144409, |
|
"learning_rate": 3.808411214953271e-05, |
|
"loss": 0.3971, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.335766423357664, |
|
"grad_norm": 4.384274005889893, |
|
"learning_rate": 3.691588785046729e-05, |
|
"loss": 0.4075, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.4817518248175183, |
|
"grad_norm": 3.9660568237304688, |
|
"learning_rate": 3.574766355140187e-05, |
|
"loss": 0.3788, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.627737226277372, |
|
"grad_norm": 4.195096492767334, |
|
"learning_rate": 3.457943925233645e-05, |
|
"loss": 0.376, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.7737226277372264, |
|
"grad_norm": 3.6591451168060303, |
|
"learning_rate": 3.341121495327103e-05, |
|
"loss": 0.3514, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.9197080291970803, |
|
"grad_norm": 3.5800323486328125, |
|
"learning_rate": 3.224299065420561e-05, |
|
"loss": 0.3532, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.9927007299270074, |
|
"eval_accuracy": 0.8615631446252578, |
|
"eval_loss": 0.3563055098056793, |
|
"eval_runtime": 36.2136, |
|
"eval_samples_per_second": 120.48, |
|
"eval_steps_per_second": 1.905, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 3.065693430656934, |
|
"grad_norm": 6.1199421882629395, |
|
"learning_rate": 3.107476635514019e-05, |
|
"loss": 0.3963, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.2116788321167884, |
|
"grad_norm": 4.893133163452148, |
|
"learning_rate": 2.9906542056074764e-05, |
|
"loss": 0.3358, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.3576642335766422, |
|
"grad_norm": 4.033535957336426, |
|
"learning_rate": 2.873831775700935e-05, |
|
"loss": 0.3398, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.5036496350364965, |
|
"grad_norm": 4.510129928588867, |
|
"learning_rate": 2.7570093457943924e-05, |
|
"loss": 0.3628, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.6496350364963503, |
|
"grad_norm": 3.0796680450439453, |
|
"learning_rate": 2.6401869158878506e-05, |
|
"loss": 0.3171, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.795620437956204, |
|
"grad_norm": 4.170633316040039, |
|
"learning_rate": 2.5233644859813084e-05, |
|
"loss": 0.3299, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.9416058394160585, |
|
"grad_norm": 9.109073638916016, |
|
"learning_rate": 2.4065420560747666e-05, |
|
"loss": 0.3365, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8700435480174192, |
|
"eval_loss": 0.3332568407058716, |
|
"eval_runtime": 36.1676, |
|
"eval_samples_per_second": 120.633, |
|
"eval_steps_per_second": 1.908, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 4.087591240875913, |
|
"grad_norm": 4.610809803009033, |
|
"learning_rate": 2.2897196261682244e-05, |
|
"loss": 0.3744, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.233576642335766, |
|
"grad_norm": 4.210699558258057, |
|
"learning_rate": 2.1728971962616822e-05, |
|
"loss": 0.3022, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.37956204379562, |
|
"grad_norm": 4.85805082321167, |
|
"learning_rate": 2.05607476635514e-05, |
|
"loss": 0.2934, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.525547445255475, |
|
"grad_norm": 3.4125349521636963, |
|
"learning_rate": 1.9392523364485982e-05, |
|
"loss": 0.312, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.671532846715328, |
|
"grad_norm": 4.196589946746826, |
|
"learning_rate": 1.822429906542056e-05, |
|
"loss": 0.2963, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.817518248175182, |
|
"grad_norm": 3.81192684173584, |
|
"learning_rate": 1.705607476635514e-05, |
|
"loss": 0.3019, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.963503649635037, |
|
"grad_norm": 3.5110998153686523, |
|
"learning_rate": 1.588785046728972e-05, |
|
"loss": 0.2976, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.992700729927007, |
|
"eval_accuracy": 0.8837955535182214, |
|
"eval_loss": 0.30171430110931396, |
|
"eval_runtime": 35.5004, |
|
"eval_samples_per_second": 122.9, |
|
"eval_steps_per_second": 1.944, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 5.109489051094891, |
|
"grad_norm": 6.99758768081665, |
|
"learning_rate": 1.4719626168224299e-05, |
|
"loss": 0.259, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.255474452554744, |
|
"grad_norm": 4.643028736114502, |
|
"learning_rate": 1.3551401869158877e-05, |
|
"loss": 0.2668, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.401459854014599, |
|
"grad_norm": 3.8736965656280518, |
|
"learning_rate": 1.2383177570093459e-05, |
|
"loss": 0.278, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.547445255474453, |
|
"grad_norm": 5.259199142456055, |
|
"learning_rate": 1.1214953271028037e-05, |
|
"loss": 0.2677, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.693430656934306, |
|
"grad_norm": 3.9390275478363037, |
|
"learning_rate": 1.0046728971962617e-05, |
|
"loss": 0.2664, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.839416058394161, |
|
"grad_norm": 4.5700507164001465, |
|
"learning_rate": 8.878504672897196e-06, |
|
"loss": 0.2745, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.985401459854015, |
|
"grad_norm": 3.543813705444336, |
|
"learning_rate": 7.710280373831776e-06, |
|
"loss": 0.2611, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8810451524180609, |
|
"eval_loss": 0.3118920624256134, |
|
"eval_runtime": 35.1336, |
|
"eval_samples_per_second": 124.183, |
|
"eval_steps_per_second": 1.964, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 6.131386861313868, |
|
"grad_norm": 4.951327800750732, |
|
"learning_rate": 6.542056074766355e-06, |
|
"loss": 0.2515, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.2773722627737225, |
|
"grad_norm": 9.216889381408691, |
|
"learning_rate": 5.373831775700935e-06, |
|
"loss": 0.2588, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.423357664233577, |
|
"grad_norm": 4.5815606117248535, |
|
"learning_rate": 4.205607476635514e-06, |
|
"loss": 0.2531, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.569343065693431, |
|
"grad_norm": 2.8593881130218506, |
|
"learning_rate": 3.0373831775700936e-06, |
|
"loss": 0.2453, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.7153284671532845, |
|
"grad_norm": 5.8815598487854, |
|
"learning_rate": 1.8691588785046728e-06, |
|
"loss": 0.2746, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.861313868613139, |
|
"grad_norm": 7.535807132720947, |
|
"learning_rate": 7.009345794392523e-07, |
|
"loss": 0.255, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.948905109489051, |
|
"eval_accuracy": 0.8819619527847811, |
|
"eval_loss": 0.3084806203842163, |
|
"eval_runtime": 35.4691, |
|
"eval_samples_per_second": 123.008, |
|
"eval_steps_per_second": 1.945, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 6.948905109489051, |
|
"step": 476, |
|
"total_flos": 9.517373864500433e+18, |
|
"train_loss": 0.3870567884765753, |
|
"train_runtime": 2171.3346, |
|
"train_samples_per_second": 56.33, |
|
"train_steps_per_second": 0.219 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 476, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.517373864500433e+18, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|