{ "best_metric": 0.8837955535182214, "best_model_checkpoint": "swin-base-patch4-window7-224-in22k-MM_Classification_base_web_images/checkpoint-342", "epoch": 6.948905109489051, "eval_steps": 500, "global_step": 476, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.145985401459854, "grad_norm": 3.675107717514038, "learning_rate": 1.0416666666666668e-05, "loss": 1.0612, "step": 10 }, { "epoch": 0.291970802919708, "grad_norm": 3.70519757270813, "learning_rate": 2.0833333333333336e-05, "loss": 0.8853, "step": 20 }, { "epoch": 0.43795620437956206, "grad_norm": 3.8755295276641846, "learning_rate": 3.125e-05, "loss": 0.7099, "step": 30 }, { "epoch": 0.583941605839416, "grad_norm": 2.727118492126465, "learning_rate": 4.166666666666667e-05, "loss": 0.6121, "step": 40 }, { "epoch": 0.7299270072992701, "grad_norm": 3.260768175125122, "learning_rate": 4.976635514018692e-05, "loss": 0.5736, "step": 50 }, { "epoch": 0.8759124087591241, "grad_norm": 5.847088813781738, "learning_rate": 4.85981308411215e-05, "loss": 0.517, "step": 60 }, { "epoch": 0.9927007299270073, "eval_accuracy": 0.8157231262892505, "eval_loss": 0.4430324137210846, "eval_runtime": 35.8383, "eval_samples_per_second": 121.741, "eval_steps_per_second": 1.925, "step": 68 }, { "epoch": 1.0218978102189782, "grad_norm": 5.806203365325928, "learning_rate": 4.742990654205608e-05, "loss": 0.4972, "step": 70 }, { "epoch": 1.167883211678832, "grad_norm": 2.751803159713745, "learning_rate": 4.6261682242990654e-05, "loss": 0.4767, "step": 80 }, { "epoch": 1.313868613138686, "grad_norm": 2.5074498653411865, "learning_rate": 4.5093457943925236e-05, "loss": 0.4509, "step": 90 }, { "epoch": 1.4598540145985401, "grad_norm": 3.3779966831207275, "learning_rate": 4.392523364485982e-05, "loss": 0.4496, "step": 100 }, { "epoch": 1.6058394160583942, "grad_norm": 4.8917741775512695, "learning_rate": 4.27570093457944e-05, "loss": 0.4528, "step": 110 }, { "epoch": 1.7518248175182483, "grad_norm": 4.1523966789245605, "learning_rate": 4.1588785046728974e-05, "loss": 0.4535, "step": 120 }, { "epoch": 1.897810218978102, "grad_norm": 6.303163051605225, "learning_rate": 4.0420560747663556e-05, "loss": 0.4211, "step": 130 }, { "epoch": 2.0, "eval_accuracy": 0.8457483382993353, "eval_loss": 0.38001278042793274, "eval_runtime": 36.8858, "eval_samples_per_second": 118.284, "eval_steps_per_second": 1.871, "step": 137 }, { "epoch": 2.0437956204379564, "grad_norm": 6.562156677246094, "learning_rate": 3.925233644859813e-05, "loss": 0.4435, "step": 140 }, { "epoch": 2.18978102189781, "grad_norm": 3.601300001144409, "learning_rate": 3.808411214953271e-05, "loss": 0.3971, "step": 150 }, { "epoch": 2.335766423357664, "grad_norm": 4.384274005889893, "learning_rate": 3.691588785046729e-05, "loss": 0.4075, "step": 160 }, { "epoch": 2.4817518248175183, "grad_norm": 3.9660568237304688, "learning_rate": 3.574766355140187e-05, "loss": 0.3788, "step": 170 }, { "epoch": 2.627737226277372, "grad_norm": 4.195096492767334, "learning_rate": 3.457943925233645e-05, "loss": 0.376, "step": 180 }, { "epoch": 2.7737226277372264, "grad_norm": 3.6591451168060303, "learning_rate": 3.341121495327103e-05, "loss": 0.3514, "step": 190 }, { "epoch": 2.9197080291970803, "grad_norm": 3.5800323486328125, "learning_rate": 3.224299065420561e-05, "loss": 0.3532, "step": 200 }, { "epoch": 2.9927007299270074, "eval_accuracy": 0.8615631446252578, "eval_loss": 0.3563055098056793, "eval_runtime": 36.2136, "eval_samples_per_second": 120.48, "eval_steps_per_second": 1.905, "step": 205 }, { "epoch": 3.065693430656934, "grad_norm": 6.1199421882629395, "learning_rate": 3.107476635514019e-05, "loss": 0.3963, "step": 210 }, { "epoch": 3.2116788321167884, "grad_norm": 4.893133163452148, "learning_rate": 2.9906542056074764e-05, "loss": 0.3358, "step": 220 }, { "epoch": 3.3576642335766422, "grad_norm": 4.033535957336426, "learning_rate": 2.873831775700935e-05, "loss": 0.3398, "step": 230 }, { "epoch": 3.5036496350364965, "grad_norm": 4.510129928588867, "learning_rate": 2.7570093457943924e-05, "loss": 0.3628, "step": 240 }, { "epoch": 3.6496350364963503, "grad_norm": 3.0796680450439453, "learning_rate": 2.6401869158878506e-05, "loss": 0.3171, "step": 250 }, { "epoch": 3.795620437956204, "grad_norm": 4.170633316040039, "learning_rate": 2.5233644859813084e-05, "loss": 0.3299, "step": 260 }, { "epoch": 3.9416058394160585, "grad_norm": 9.109073638916016, "learning_rate": 2.4065420560747666e-05, "loss": 0.3365, "step": 270 }, { "epoch": 4.0, "eval_accuracy": 0.8700435480174192, "eval_loss": 0.3332568407058716, "eval_runtime": 36.1676, "eval_samples_per_second": 120.633, "eval_steps_per_second": 1.908, "step": 274 }, { "epoch": 4.087591240875913, "grad_norm": 4.610809803009033, "learning_rate": 2.2897196261682244e-05, "loss": 0.3744, "step": 280 }, { "epoch": 4.233576642335766, "grad_norm": 4.210699558258057, "learning_rate": 2.1728971962616822e-05, "loss": 0.3022, "step": 290 }, { "epoch": 4.37956204379562, "grad_norm": 4.85805082321167, "learning_rate": 2.05607476635514e-05, "loss": 0.2934, "step": 300 }, { "epoch": 4.525547445255475, "grad_norm": 3.4125349521636963, "learning_rate": 1.9392523364485982e-05, "loss": 0.312, "step": 310 }, { "epoch": 4.671532846715328, "grad_norm": 4.196589946746826, "learning_rate": 1.822429906542056e-05, "loss": 0.2963, "step": 320 }, { "epoch": 4.817518248175182, "grad_norm": 3.81192684173584, "learning_rate": 1.705607476635514e-05, "loss": 0.3019, "step": 330 }, { "epoch": 4.963503649635037, "grad_norm": 3.5110998153686523, "learning_rate": 1.588785046728972e-05, "loss": 0.2976, "step": 340 }, { "epoch": 4.992700729927007, "eval_accuracy": 0.8837955535182214, "eval_loss": 0.30171430110931396, "eval_runtime": 35.5004, "eval_samples_per_second": 122.9, "eval_steps_per_second": 1.944, "step": 342 }, { "epoch": 5.109489051094891, "grad_norm": 6.99758768081665, "learning_rate": 1.4719626168224299e-05, "loss": 0.259, "step": 350 }, { "epoch": 5.255474452554744, "grad_norm": 4.643028736114502, "learning_rate": 1.3551401869158877e-05, "loss": 0.2668, "step": 360 }, { "epoch": 5.401459854014599, "grad_norm": 3.8736965656280518, "learning_rate": 1.2383177570093459e-05, "loss": 0.278, "step": 370 }, { "epoch": 5.547445255474453, "grad_norm": 5.259199142456055, "learning_rate": 1.1214953271028037e-05, "loss": 0.2677, "step": 380 }, { "epoch": 5.693430656934306, "grad_norm": 3.9390275478363037, "learning_rate": 1.0046728971962617e-05, "loss": 0.2664, "step": 390 }, { "epoch": 5.839416058394161, "grad_norm": 4.5700507164001465, "learning_rate": 8.878504672897196e-06, "loss": 0.2745, "step": 400 }, { "epoch": 5.985401459854015, "grad_norm": 3.543813705444336, "learning_rate": 7.710280373831776e-06, "loss": 0.2611, "step": 410 }, { "epoch": 6.0, "eval_accuracy": 0.8810451524180609, "eval_loss": 0.3118920624256134, "eval_runtime": 35.1336, "eval_samples_per_second": 124.183, "eval_steps_per_second": 1.964, "step": 411 }, { "epoch": 6.131386861313868, "grad_norm": 4.951327800750732, "learning_rate": 6.542056074766355e-06, "loss": 0.2515, "step": 420 }, { "epoch": 6.2773722627737225, "grad_norm": 9.216889381408691, "learning_rate": 5.373831775700935e-06, "loss": 0.2588, "step": 430 }, { "epoch": 6.423357664233577, "grad_norm": 4.5815606117248535, "learning_rate": 4.205607476635514e-06, "loss": 0.2531, "step": 440 }, { "epoch": 6.569343065693431, "grad_norm": 2.8593881130218506, "learning_rate": 3.0373831775700936e-06, "loss": 0.2453, "step": 450 }, { "epoch": 6.7153284671532845, "grad_norm": 5.8815598487854, "learning_rate": 1.8691588785046728e-06, "loss": 0.2746, "step": 460 }, { "epoch": 6.861313868613139, "grad_norm": 7.535807132720947, "learning_rate": 7.009345794392523e-07, "loss": 0.255, "step": 470 }, { "epoch": 6.948905109489051, "eval_accuracy": 0.8819619527847811, "eval_loss": 0.3084806203842163, "eval_runtime": 35.4691, "eval_samples_per_second": 123.008, "eval_steps_per_second": 1.945, "step": 476 }, { "epoch": 6.948905109489051, "step": 476, "total_flos": 9.517373864500433e+18, "train_loss": 0.3870567884765753, "train_runtime": 2171.3346, "train_samples_per_second": 56.33, "train_steps_per_second": 0.219 } ], "logging_steps": 10, "max_steps": 476, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.517373864500433e+18, "train_batch_size": 64, "trial_name": null, "trial_params": null }