{ "best_metric": 0.8548387096774194, "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-Ocular-Toxoplasmosis-DA/checkpoint-256", "epoch": 38.51851851851852, "eval_steps": 500, "global_step": 520, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7407407407407407, "grad_norm": 4.370074272155762, "learning_rate": 9.615384615384616e-06, "loss": 1.3402, "step": 10 }, { "epoch": 0.9629629629629629, "eval_accuracy": 0.5483870967741935, "eval_loss": 1.1682088375091553, "eval_runtime": 2.3908, "eval_samples_per_second": 25.932, "eval_steps_per_second": 0.837, "step": 13 }, { "epoch": 1.4814814814814814, "grad_norm": 7.86944580078125, "learning_rate": 1.923076923076923e-05, "loss": 1.1725, "step": 20 }, { "epoch": 2.0, "eval_accuracy": 0.6290322580645161, "eval_loss": 1.0024793148040771, "eval_runtime": 3.324, "eval_samples_per_second": 18.652, "eval_steps_per_second": 0.602, "step": 27 }, { "epoch": 2.2222222222222223, "grad_norm": 4.94896936416626, "learning_rate": 2.8846153846153845e-05, "loss": 1.0671, "step": 30 }, { "epoch": 2.962962962962963, "grad_norm": 4.811951160430908, "learning_rate": 3.846153846153846e-05, "loss": 0.8824, "step": 40 }, { "epoch": 2.962962962962963, "eval_accuracy": 0.6612903225806451, "eval_loss": 0.7644360065460205, "eval_runtime": 3.388, "eval_samples_per_second": 18.3, "eval_steps_per_second": 0.59, "step": 40 }, { "epoch": 3.7037037037037037, "grad_norm": 7.974093914031982, "learning_rate": 4.8076923076923084e-05, "loss": 0.7342, "step": 50 }, { "epoch": 4.0, "eval_accuracy": 0.7258064516129032, "eval_loss": 0.5839676260948181, "eval_runtime": 3.0543, "eval_samples_per_second": 20.299, "eval_steps_per_second": 0.655, "step": 54 }, { "epoch": 4.444444444444445, "grad_norm": 8.472794532775879, "learning_rate": 4.9145299145299147e-05, "loss": 0.6734, "step": 60 }, { "epoch": 4.962962962962963, "eval_accuracy": 0.6451612903225806, "eval_loss": 0.6753666400909424, "eval_runtime": 2.3642, "eval_samples_per_second": 26.225, "eval_steps_per_second": 0.846, "step": 67 }, { "epoch": 5.185185185185185, "grad_norm": 9.15774917602539, "learning_rate": 4.8076923076923084e-05, "loss": 0.6373, "step": 70 }, { "epoch": 5.925925925925926, "grad_norm": 12.02450942993164, "learning_rate": 4.700854700854701e-05, "loss": 0.5167, "step": 80 }, { "epoch": 6.0, "eval_accuracy": 0.6935483870967742, "eval_loss": 0.5904402136802673, "eval_runtime": 2.3866, "eval_samples_per_second": 25.979, "eval_steps_per_second": 0.838, "step": 81 }, { "epoch": 6.666666666666667, "grad_norm": 7.348090648651123, "learning_rate": 4.594017094017094e-05, "loss": 0.5009, "step": 90 }, { "epoch": 6.962962962962963, "eval_accuracy": 0.6935483870967742, "eval_loss": 0.5549384355545044, "eval_runtime": 2.9982, "eval_samples_per_second": 20.679, "eval_steps_per_second": 0.667, "step": 94 }, { "epoch": 7.407407407407407, "grad_norm": 5.642479419708252, "learning_rate": 4.4871794871794874e-05, "loss": 0.4988, "step": 100 }, { "epoch": 8.0, "eval_accuracy": 0.6774193548387096, "eval_loss": 0.620449423789978, "eval_runtime": 2.4283, "eval_samples_per_second": 25.532, "eval_steps_per_second": 0.824, "step": 108 }, { "epoch": 8.148148148148149, "grad_norm": 6.128896713256836, "learning_rate": 4.3803418803418805e-05, "loss": 0.4619, "step": 110 }, { "epoch": 8.88888888888889, "grad_norm": 7.555347919464111, "learning_rate": 4.2735042735042735e-05, "loss": 0.3856, "step": 120 }, { "epoch": 8.962962962962964, "eval_accuracy": 0.8225806451612904, "eval_loss": 0.44631102681159973, "eval_runtime": 2.3506, "eval_samples_per_second": 26.376, "eval_steps_per_second": 0.851, "step": 121 }, { "epoch": 9.62962962962963, "grad_norm": 9.627432823181152, "learning_rate": 4.166666666666667e-05, "loss": 0.4057, "step": 130 }, { "epoch": 10.0, "eval_accuracy": 0.7903225806451613, "eval_loss": 0.5231879353523254, "eval_runtime": 3.1544, "eval_samples_per_second": 19.655, "eval_steps_per_second": 0.634, "step": 135 }, { "epoch": 10.37037037037037, "grad_norm": 8.669109344482422, "learning_rate": 4.05982905982906e-05, "loss": 0.3929, "step": 140 }, { "epoch": 10.962962962962964, "eval_accuracy": 0.8387096774193549, "eval_loss": 0.45801177620887756, "eval_runtime": 2.3878, "eval_samples_per_second": 25.965, "eval_steps_per_second": 0.838, "step": 148 }, { "epoch": 11.11111111111111, "grad_norm": 6.289756774902344, "learning_rate": 3.952991452991453e-05, "loss": 0.3673, "step": 150 }, { "epoch": 11.851851851851851, "grad_norm": 12.90579605102539, "learning_rate": 3.846153846153846e-05, "loss": 0.3638, "step": 160 }, { "epoch": 12.0, "eval_accuracy": 0.7741935483870968, "eval_loss": 0.5114619135856628, "eval_runtime": 3.3569, "eval_samples_per_second": 18.47, "eval_steps_per_second": 0.596, "step": 162 }, { "epoch": 12.592592592592592, "grad_norm": 10.698553085327148, "learning_rate": 3.739316239316239e-05, "loss": 0.3248, "step": 170 }, { "epoch": 12.962962962962964, "eval_accuracy": 0.7741935483870968, "eval_loss": 0.5312773585319519, "eval_runtime": 2.4335, "eval_samples_per_second": 25.478, "eval_steps_per_second": 0.822, "step": 175 }, { "epoch": 13.333333333333334, "grad_norm": 6.529489994049072, "learning_rate": 3.6324786324786323e-05, "loss": 0.2673, "step": 180 }, { "epoch": 14.0, "eval_accuracy": 0.7903225806451613, "eval_loss": 0.5203306674957275, "eval_runtime": 3.4828, "eval_samples_per_second": 17.802, "eval_steps_per_second": 0.574, "step": 189 }, { "epoch": 14.074074074074074, "grad_norm": 6.994911193847656, "learning_rate": 3.525641025641026e-05, "loss": 0.3216, "step": 190 }, { "epoch": 14.814814814814815, "grad_norm": 9.194233894348145, "learning_rate": 3.418803418803419e-05, "loss": 0.2922, "step": 200 }, { "epoch": 14.962962962962964, "eval_accuracy": 0.8387096774193549, "eval_loss": 0.4315454959869385, "eval_runtime": 2.3822, "eval_samples_per_second": 26.026, "eval_steps_per_second": 0.84, "step": 202 }, { "epoch": 15.555555555555555, "grad_norm": 6.076256275177002, "learning_rate": 3.311965811965812e-05, "loss": 0.2803, "step": 210 }, { "epoch": 16.0, "eval_accuracy": 0.8387096774193549, "eval_loss": 0.4577220380306244, "eval_runtime": 2.9439, "eval_samples_per_second": 21.06, "eval_steps_per_second": 0.679, "step": 216 }, { "epoch": 16.296296296296298, "grad_norm": 12.038761138916016, "learning_rate": 3.205128205128206e-05, "loss": 0.2735, "step": 220 }, { "epoch": 16.962962962962962, "eval_accuracy": 0.8064516129032258, "eval_loss": 0.5466907024383545, "eval_runtime": 2.3229, "eval_samples_per_second": 26.691, "eval_steps_per_second": 0.861, "step": 229 }, { "epoch": 17.037037037037038, "grad_norm": 8.897506713867188, "learning_rate": 3.098290598290599e-05, "loss": 0.2776, "step": 230 }, { "epoch": 17.77777777777778, "grad_norm": 9.66178035736084, "learning_rate": 2.9914529914529915e-05, "loss": 0.2586, "step": 240 }, { "epoch": 18.0, "eval_accuracy": 0.8387096774193549, "eval_loss": 0.5236416459083557, "eval_runtime": 3.4253, "eval_samples_per_second": 18.101, "eval_steps_per_second": 0.584, "step": 243 }, { "epoch": 18.51851851851852, "grad_norm": 7.729655742645264, "learning_rate": 2.8846153846153845e-05, "loss": 0.2366, "step": 250 }, { "epoch": 18.962962962962962, "eval_accuracy": 0.8548387096774194, "eval_loss": 0.5075119137763977, "eval_runtime": 2.3943, "eval_samples_per_second": 25.895, "eval_steps_per_second": 0.835, "step": 256 }, { "epoch": 19.25925925925926, "grad_norm": 11.543585777282715, "learning_rate": 2.777777777777778e-05, "loss": 0.252, "step": 260 }, { "epoch": 20.0, "grad_norm": 7.877120494842529, "learning_rate": 2.670940170940171e-05, "loss": 0.2347, "step": 270 }, { "epoch": 20.0, "eval_accuracy": 0.8387096774193549, "eval_loss": 0.5178562998771667, "eval_runtime": 2.4124, "eval_samples_per_second": 25.701, "eval_steps_per_second": 0.829, "step": 270 }, { "epoch": 20.74074074074074, "grad_norm": 7.83768892288208, "learning_rate": 2.564102564102564e-05, "loss": 0.2046, "step": 280 }, { "epoch": 20.962962962962962, "eval_accuracy": 0.8387096774193549, "eval_loss": 0.5427502393722534, "eval_runtime": 3.4728, "eval_samples_per_second": 17.853, "eval_steps_per_second": 0.576, "step": 283 }, { "epoch": 21.48148148148148, "grad_norm": 7.919957637786865, "learning_rate": 2.4572649572649573e-05, "loss": 0.2289, "step": 290 }, { "epoch": 22.0, "eval_accuracy": 0.8387096774193549, "eval_loss": 0.57480788230896, "eval_runtime": 2.4021, "eval_samples_per_second": 25.811, "eval_steps_per_second": 0.833, "step": 297 }, { "epoch": 22.22222222222222, "grad_norm": 8.665252685546875, "learning_rate": 2.3504273504273504e-05, "loss": 0.2394, "step": 300 }, { "epoch": 22.962962962962962, "grad_norm": 7.902819633483887, "learning_rate": 2.2435897435897437e-05, "loss": 0.2195, "step": 310 }, { "epoch": 22.962962962962962, "eval_accuracy": 0.8225806451612904, "eval_loss": 0.5968937277793884, "eval_runtime": 3.4133, "eval_samples_per_second": 18.164, "eval_steps_per_second": 0.586, "step": 310 }, { "epoch": 23.703703703703702, "grad_norm": 9.844597816467285, "learning_rate": 2.1367521367521368e-05, "loss": 0.2224, "step": 320 }, { "epoch": 24.0, "eval_accuracy": 0.8225806451612904, "eval_loss": 0.6092303991317749, "eval_runtime": 2.3949, "eval_samples_per_second": 25.888, "eval_steps_per_second": 0.835, "step": 324 }, { "epoch": 24.444444444444443, "grad_norm": 6.439063549041748, "learning_rate": 2.02991452991453e-05, "loss": 0.2167, "step": 330 }, { "epoch": 24.962962962962962, "eval_accuracy": 0.8225806451612904, "eval_loss": 0.6333113312721252, "eval_runtime": 2.4482, "eval_samples_per_second": 25.325, "eval_steps_per_second": 0.817, "step": 337 }, { "epoch": 25.185185185185187, "grad_norm": 8.865224838256836, "learning_rate": 1.923076923076923e-05, "loss": 0.2323, "step": 340 }, { "epoch": 25.925925925925927, "grad_norm": 6.462991237640381, "learning_rate": 1.8162393162393162e-05, "loss": 0.1956, "step": 350 }, { "epoch": 26.0, "eval_accuracy": 0.8225806451612904, "eval_loss": 0.5993022322654724, "eval_runtime": 2.3358, "eval_samples_per_second": 26.543, "eval_steps_per_second": 0.856, "step": 351 }, { "epoch": 26.666666666666668, "grad_norm": 6.978143692016602, "learning_rate": 1.7094017094017095e-05, "loss": 0.2174, "step": 360 }, { "epoch": 26.962962962962962, "eval_accuracy": 0.8548387096774194, "eval_loss": 0.6063364744186401, "eval_runtime": 2.3579, "eval_samples_per_second": 26.295, "eval_steps_per_second": 0.848, "step": 364 }, { "epoch": 27.40740740740741, "grad_norm": 8.283989906311035, "learning_rate": 1.602564102564103e-05, "loss": 0.1999, "step": 370 }, { "epoch": 28.0, "eval_accuracy": 0.8387096774193549, "eval_loss": 0.6413679718971252, "eval_runtime": 3.4435, "eval_samples_per_second": 18.005, "eval_steps_per_second": 0.581, "step": 378 }, { "epoch": 28.14814814814815, "grad_norm": 5.77383279800415, "learning_rate": 1.4957264957264958e-05, "loss": 0.1783, "step": 380 }, { "epoch": 28.88888888888889, "grad_norm": 7.4615654945373535, "learning_rate": 1.388888888888889e-05, "loss": 0.1667, "step": 390 }, { "epoch": 28.962962962962962, "eval_accuracy": 0.8387096774193549, "eval_loss": 0.6296666860580444, "eval_runtime": 2.3485, "eval_samples_per_second": 26.4, "eval_steps_per_second": 0.852, "step": 391 }, { "epoch": 29.62962962962963, "grad_norm": 9.373270034790039, "learning_rate": 1.282051282051282e-05, "loss": 0.1835, "step": 400 }, { "epoch": 30.0, "eval_accuracy": 0.8225806451612904, "eval_loss": 0.6148854494094849, "eval_runtime": 3.1829, "eval_samples_per_second": 19.479, "eval_steps_per_second": 0.628, "step": 405 }, { "epoch": 30.37037037037037, "grad_norm": 8.87562370300293, "learning_rate": 1.1752136752136752e-05, "loss": 0.186, "step": 410 }, { "epoch": 30.962962962962962, "eval_accuracy": 0.8387096774193549, "eval_loss": 0.6429581642150879, "eval_runtime": 2.4503, "eval_samples_per_second": 25.303, "eval_steps_per_second": 0.816, "step": 418 }, { "epoch": 31.11111111111111, "grad_norm": 5.281705856323242, "learning_rate": 1.0683760683760684e-05, "loss": 0.1706, "step": 420 }, { "epoch": 31.85185185185185, "grad_norm": 4.753020286560059, "learning_rate": 9.615384615384616e-06, "loss": 0.1749, "step": 430 }, { "epoch": 32.0, "eval_accuracy": 0.8387096774193549, "eval_loss": 0.6677759885787964, "eval_runtime": 2.3885, "eval_samples_per_second": 25.957, "eval_steps_per_second": 0.837, "step": 432 }, { "epoch": 32.592592592592595, "grad_norm": 7.2512526512146, "learning_rate": 8.547008547008548e-06, "loss": 0.1663, "step": 440 }, { "epoch": 32.96296296296296, "eval_accuracy": 0.8387096774193549, "eval_loss": 0.6828835010528564, "eval_runtime": 2.3483, "eval_samples_per_second": 26.402, "eval_steps_per_second": 0.852, "step": 445 }, { "epoch": 33.333333333333336, "grad_norm": 9.678658485412598, "learning_rate": 7.478632478632479e-06, "loss": 0.1557, "step": 450 }, { "epoch": 34.0, "eval_accuracy": 0.8387096774193549, "eval_loss": 0.655702531337738, "eval_runtime": 3.1084, "eval_samples_per_second": 19.946, "eval_steps_per_second": 0.643, "step": 459 }, { "epoch": 34.074074074074076, "grad_norm": 5.886323928833008, "learning_rate": 6.41025641025641e-06, "loss": 0.2095, "step": 460 }, { "epoch": 34.81481481481482, "grad_norm": 5.312963485717773, "learning_rate": 5.341880341880342e-06, "loss": 0.1913, "step": 470 }, { "epoch": 34.96296296296296, "eval_accuracy": 0.8387096774193549, "eval_loss": 0.6274862885475159, "eval_runtime": 3.2878, "eval_samples_per_second": 18.858, "eval_steps_per_second": 0.608, "step": 472 }, { "epoch": 35.55555555555556, "grad_norm": 7.064798355102539, "learning_rate": 4.273504273504274e-06, "loss": 0.1775, "step": 480 }, { "epoch": 36.0, "eval_accuracy": 0.8548387096774194, "eval_loss": 0.6554756760597229, "eval_runtime": 2.4759, "eval_samples_per_second": 25.041, "eval_steps_per_second": 0.808, "step": 486 }, { "epoch": 36.2962962962963, "grad_norm": 5.463845729827881, "learning_rate": 3.205128205128205e-06, "loss": 0.152, "step": 490 }, { "epoch": 36.96296296296296, "eval_accuracy": 0.8548387096774194, "eval_loss": 0.6653042435646057, "eval_runtime": 3.3751, "eval_samples_per_second": 18.37, "eval_steps_per_second": 0.593, "step": 499 }, { "epoch": 37.03703703703704, "grad_norm": 5.512512683868408, "learning_rate": 2.136752136752137e-06, "loss": 0.1681, "step": 500 }, { "epoch": 37.77777777777778, "grad_norm": 6.535687446594238, "learning_rate": 1.0683760683760685e-06, "loss": 0.1897, "step": 510 }, { "epoch": 38.0, "eval_accuracy": 0.8548387096774194, "eval_loss": 0.6681959629058838, "eval_runtime": 2.3421, "eval_samples_per_second": 26.472, "eval_steps_per_second": 0.854, "step": 513 }, { "epoch": 38.51851851851852, "grad_norm": 8.290581703186035, "learning_rate": 0.0, "loss": 0.1589, "step": 520 }, { "epoch": 38.51851851851852, "eval_accuracy": 0.8548387096774194, "eval_loss": 0.6678970456123352, "eval_runtime": 2.3455, "eval_samples_per_second": 26.434, "eval_steps_per_second": 0.853, "step": 520 }, { "epoch": 38.51851851851852, "step": 520, "total_flos": 2.140878196703232e+18, "train_loss": 0.35049390150950505, "train_runtime": 3356.7171, "train_samples_per_second": 20.353, "train_steps_per_second": 0.155 } ], "logging_steps": 10, "max_steps": 520, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.140878196703232e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }