{ "best_metric": 0.8672727272727273, "best_model_checkpoint": "beit-base-patch16-224-RD\\checkpoint-764", "epoch": 39.75155279503105, "eval_steps": 500, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.25, "learning_rate": 3.125e-06, "loss": 1.6079, "step": 10 }, { "epoch": 0.5, "learning_rate": 6.25e-06, "loss": 1.594, "step": 20 }, { "epoch": 0.75, "learning_rate": 9.375000000000001e-06, "loss": 1.5602, "step": 30 }, { "epoch": 0.99, "learning_rate": 1.25e-05, "loss": 1.4986, "step": 40 }, { "epoch": 0.99, "eval_accuracy": 0.49454545454545457, "eval_loss": 1.4512125253677368, "eval_runtime": 3.6819, "eval_samples_per_second": 149.381, "eval_steps_per_second": 9.506, "step": 40 }, { "epoch": 1.24, "learning_rate": 1.5625e-05, "loss": 1.4191, "step": 50 }, { "epoch": 1.49, "learning_rate": 1.8750000000000002e-05, "loss": 1.3138, "step": 60 }, { "epoch": 1.74, "learning_rate": 2.1875e-05, "loss": 1.1635, "step": 70 }, { "epoch": 1.99, "learning_rate": 2.5e-05, "loss": 1.0553, "step": 80 }, { "epoch": 1.99, "eval_accuracy": 0.7472727272727273, "eval_loss": 0.9355355501174927, "eval_runtime": 3.5838, "eval_samples_per_second": 153.468, "eval_steps_per_second": 9.766, "step": 80 }, { "epoch": 2.24, "learning_rate": 2.8125000000000003e-05, "loss": 0.9336, "step": 90 }, { "epoch": 2.48, "learning_rate": 3.125e-05, "loss": 0.8113, "step": 100 }, { "epoch": 2.73, "learning_rate": 3.4375e-05, "loss": 0.771, "step": 110 }, { "epoch": 2.98, "learning_rate": 3.7500000000000003e-05, "loss": 0.7972, "step": 120 }, { "epoch": 2.98, "eval_accuracy": 0.7436363636363637, "eval_loss": 0.7250072956085205, "eval_runtime": 3.805, "eval_samples_per_second": 144.547, "eval_steps_per_second": 9.198, "step": 120 }, { "epoch": 3.23, "learning_rate": 4.0625000000000005e-05, "loss": 0.7561, "step": 130 }, { "epoch": 3.48, "learning_rate": 4.375e-05, "loss": 0.7764, "step": 140 }, { "epoch": 3.73, "learning_rate": 4.6875e-05, "loss": 0.7277, "step": 150 }, { "epoch": 3.98, "learning_rate": 5e-05, "loss": 0.7156, "step": 160 }, { "epoch": 4.0, "eval_accuracy": 0.7581818181818182, "eval_loss": 0.5845356583595276, "eval_runtime": 3.7344, "eval_samples_per_second": 147.279, "eval_steps_per_second": 9.372, "step": 161 }, { "epoch": 4.22, "learning_rate": 4.965277777777778e-05, "loss": 0.7195, "step": 170 }, { "epoch": 4.47, "learning_rate": 4.930555555555556e-05, "loss": 0.6915, "step": 180 }, { "epoch": 4.72, "learning_rate": 4.8958333333333335e-05, "loss": 0.6135, "step": 190 }, { "epoch": 4.97, "learning_rate": 4.8611111111111115e-05, "loss": 0.6723, "step": 200 }, { "epoch": 4.99, "eval_accuracy": 0.8036363636363636, "eval_loss": 0.5509195327758789, "eval_runtime": 3.7029, "eval_samples_per_second": 148.533, "eval_steps_per_second": 9.452, "step": 201 }, { "epoch": 5.22, "learning_rate": 4.8263888888888895e-05, "loss": 0.6887, "step": 210 }, { "epoch": 5.47, "learning_rate": 4.791666666666667e-05, "loss": 0.684, "step": 220 }, { "epoch": 5.71, "learning_rate": 4.756944444444444e-05, "loss": 0.6179, "step": 230 }, { "epoch": 5.96, "learning_rate": 4.722222222222222e-05, "loss": 0.5942, "step": 240 }, { "epoch": 5.99, "eval_accuracy": 0.8218181818181818, "eval_loss": 0.5018081068992615, "eval_runtime": 3.6674, "eval_samples_per_second": 149.971, "eval_steps_per_second": 9.544, "step": 241 }, { "epoch": 6.21, "learning_rate": 4.6875e-05, "loss": 0.5778, "step": 250 }, { "epoch": 6.46, "learning_rate": 4.652777777777778e-05, "loss": 0.5984, "step": 260 }, { "epoch": 6.71, "learning_rate": 4.618055555555556e-05, "loss": 0.6025, "step": 270 }, { "epoch": 6.96, "learning_rate": 4.5833333333333334e-05, "loss": 0.6223, "step": 280 }, { "epoch": 6.98, "eval_accuracy": 0.8218181818181818, "eval_loss": 0.49931806325912476, "eval_runtime": 3.5744, "eval_samples_per_second": 153.874, "eval_steps_per_second": 9.792, "step": 281 }, { "epoch": 7.2, "learning_rate": 4.5486111111111114e-05, "loss": 0.5995, "step": 290 }, { "epoch": 7.45, "learning_rate": 4.5138888888888894e-05, "loss": 0.5855, "step": 300 }, { "epoch": 7.7, "learning_rate": 4.4791666666666673e-05, "loss": 0.5888, "step": 310 }, { "epoch": 7.95, "learning_rate": 4.4444444444444447e-05, "loss": 0.5731, "step": 320 }, { "epoch": 8.0, "eval_accuracy": 0.8290909090909091, "eval_loss": 0.4590268135070801, "eval_runtime": 3.5704, "eval_samples_per_second": 154.046, "eval_steps_per_second": 9.803, "step": 322 }, { "epoch": 8.2, "learning_rate": 4.4097222222222226e-05, "loss": 0.5685, "step": 330 }, { "epoch": 8.45, "learning_rate": 4.375e-05, "loss": 0.614, "step": 340 }, { "epoch": 8.7, "learning_rate": 4.340277777777778e-05, "loss": 0.5958, "step": 350 }, { "epoch": 8.94, "learning_rate": 4.305555555555556e-05, "loss": 0.5583, "step": 360 }, { "epoch": 8.99, "eval_accuracy": 0.8, "eval_loss": 0.4877796471118927, "eval_runtime": 3.6449, "eval_samples_per_second": 150.896, "eval_steps_per_second": 9.603, "step": 362 }, { "epoch": 9.19, "learning_rate": 4.270833333333333e-05, "loss": 0.5185, "step": 370 }, { "epoch": 9.44, "learning_rate": 4.236111111111111e-05, "loss": 0.5777, "step": 380 }, { "epoch": 9.69, "learning_rate": 4.201388888888889e-05, "loss": 0.572, "step": 390 }, { "epoch": 9.94, "learning_rate": 4.166666666666667e-05, "loss": 0.5784, "step": 400 }, { "epoch": 9.99, "eval_accuracy": 0.8454545454545455, "eval_loss": 0.4484546482563019, "eval_runtime": 3.6484, "eval_samples_per_second": 150.753, "eval_steps_per_second": 9.593, "step": 402 }, { "epoch": 10.19, "learning_rate": 4.1319444444444445e-05, "loss": 0.5207, "step": 410 }, { "epoch": 10.43, "learning_rate": 4.0972222222222225e-05, "loss": 0.4891, "step": 420 }, { "epoch": 10.68, "learning_rate": 4.0625000000000005e-05, "loss": 0.568, "step": 430 }, { "epoch": 10.93, "learning_rate": 4.027777777777778e-05, "loss": 0.4968, "step": 440 }, { "epoch": 10.98, "eval_accuracy": 0.8345454545454546, "eval_loss": 0.4305056631565094, "eval_runtime": 3.6569, "eval_samples_per_second": 150.401, "eval_steps_per_second": 9.571, "step": 442 }, { "epoch": 11.18, "learning_rate": 3.993055555555556e-05, "loss": 0.4998, "step": 450 }, { "epoch": 11.43, "learning_rate": 3.958333333333333e-05, "loss": 0.4738, "step": 460 }, { "epoch": 11.68, "learning_rate": 3.923611111111111e-05, "loss": 0.5471, "step": 470 }, { "epoch": 11.93, "learning_rate": 3.888888888888889e-05, "loss": 0.5324, "step": 480 }, { "epoch": 12.0, "eval_accuracy": 0.8345454545454546, "eval_loss": 0.47365644574165344, "eval_runtime": 3.8879, "eval_samples_per_second": 141.464, "eval_steps_per_second": 9.002, "step": 483 }, { "epoch": 12.17, "learning_rate": 3.854166666666667e-05, "loss": 0.5133, "step": 490 }, { "epoch": 12.42, "learning_rate": 3.8194444444444444e-05, "loss": 0.486, "step": 500 }, { "epoch": 12.67, "learning_rate": 3.7847222222222224e-05, "loss": 0.4803, "step": 510 }, { "epoch": 12.92, "learning_rate": 3.7500000000000003e-05, "loss": 0.4629, "step": 520 }, { "epoch": 12.99, "eval_accuracy": 0.8436363636363636, "eval_loss": 0.4253169000148773, "eval_runtime": 3.7694, "eval_samples_per_second": 145.912, "eval_steps_per_second": 9.285, "step": 523 }, { "epoch": 13.17, "learning_rate": 3.715277777777778e-05, "loss": 0.5659, "step": 530 }, { "epoch": 13.42, "learning_rate": 3.6805555555555556e-05, "loss": 0.4986, "step": 540 }, { "epoch": 13.66, "learning_rate": 3.6458333333333336e-05, "loss": 0.4903, "step": 550 }, { "epoch": 13.91, "learning_rate": 3.611111111111111e-05, "loss": 0.4398, "step": 560 }, { "epoch": 13.99, "eval_accuracy": 0.8472727272727273, "eval_loss": 0.41836607456207275, "eval_runtime": 3.9425, "eval_samples_per_second": 139.507, "eval_steps_per_second": 8.878, "step": 563 }, { "epoch": 14.16, "learning_rate": 3.576388888888889e-05, "loss": 0.4112, "step": 570 }, { "epoch": 14.41, "learning_rate": 3.541666666666667e-05, "loss": 0.4664, "step": 580 }, { "epoch": 14.66, "learning_rate": 3.506944444444444e-05, "loss": 0.4816, "step": 590 }, { "epoch": 14.91, "learning_rate": 3.472222222222222e-05, "loss": 0.4575, "step": 600 }, { "epoch": 14.98, "eval_accuracy": 0.8563636363636363, "eval_loss": 0.39294329285621643, "eval_runtime": 3.6178, "eval_samples_per_second": 152.024, "eval_steps_per_second": 9.674, "step": 603 }, { "epoch": 15.16, "learning_rate": 3.4375e-05, "loss": 0.4644, "step": 610 }, { "epoch": 15.4, "learning_rate": 3.402777777777778e-05, "loss": 0.4587, "step": 620 }, { "epoch": 15.65, "learning_rate": 3.368055555555556e-05, "loss": 0.4597, "step": 630 }, { "epoch": 15.9, "learning_rate": 3.3333333333333335e-05, "loss": 0.4554, "step": 640 }, { "epoch": 16.0, "eval_accuracy": 0.8490909090909091, "eval_loss": 0.4281716048717499, "eval_runtime": 3.5593, "eval_samples_per_second": 154.523, "eval_steps_per_second": 9.833, "step": 644 }, { "epoch": 16.15, "learning_rate": 3.2986111111111115e-05, "loss": 0.4964, "step": 650 }, { "epoch": 16.4, "learning_rate": 3.263888888888889e-05, "loss": 0.3862, "step": 660 }, { "epoch": 16.65, "learning_rate": 3.229166666666667e-05, "loss": 0.437, "step": 670 }, { "epoch": 16.89, "learning_rate": 3.194444444444444e-05, "loss": 0.4646, "step": 680 }, { "epoch": 16.99, "eval_accuracy": 0.8236363636363636, "eval_loss": 0.4363492727279663, "eval_runtime": 3.5633, "eval_samples_per_second": 154.349, "eval_steps_per_second": 9.822, "step": 684 }, { "epoch": 17.14, "learning_rate": 3.159722222222222e-05, "loss": 0.5133, "step": 690 }, { "epoch": 17.39, "learning_rate": 3.125e-05, "loss": 0.4389, "step": 700 }, { "epoch": 17.64, "learning_rate": 3.090277777777778e-05, "loss": 0.4114, "step": 710 }, { "epoch": 17.89, "learning_rate": 3.055555555555556e-05, "loss": 0.4535, "step": 720 }, { "epoch": 17.99, "eval_accuracy": 0.8454545454545455, "eval_loss": 0.43369540572166443, "eval_runtime": 3.6178, "eval_samples_per_second": 152.024, "eval_steps_per_second": 9.674, "step": 724 }, { "epoch": 18.14, "learning_rate": 3.0208333333333334e-05, "loss": 0.4653, "step": 730 }, { "epoch": 18.39, "learning_rate": 2.9861111111111113e-05, "loss": 0.4407, "step": 740 }, { "epoch": 18.63, "learning_rate": 2.951388888888889e-05, "loss": 0.4223, "step": 750 }, { "epoch": 18.88, "learning_rate": 2.916666666666667e-05, "loss": 0.3823, "step": 760 }, { "epoch": 18.98, "eval_accuracy": 0.8672727272727273, "eval_loss": 0.37708351016044617, "eval_runtime": 3.6744, "eval_samples_per_second": 149.685, "eval_steps_per_second": 9.525, "step": 764 }, { "epoch": 19.13, "learning_rate": 2.8819444444444443e-05, "loss": 0.3769, "step": 770 }, { "epoch": 19.38, "learning_rate": 2.8472222222222223e-05, "loss": 0.3677, "step": 780 }, { "epoch": 19.63, "learning_rate": 2.8125000000000003e-05, "loss": 0.4063, "step": 790 }, { "epoch": 19.88, "learning_rate": 2.777777777777778e-05, "loss": 0.4584, "step": 800 }, { "epoch": 20.0, "eval_accuracy": 0.8563636363636363, "eval_loss": 0.39663317799568176, "eval_runtime": 3.6579, "eval_samples_per_second": 150.36, "eval_steps_per_second": 9.568, "step": 805 }, { "epoch": 20.12, "learning_rate": 2.743055555555556e-05, "loss": 0.4067, "step": 810 }, { "epoch": 20.37, "learning_rate": 2.7083333333333332e-05, "loss": 0.4594, "step": 820 }, { "epoch": 20.62, "learning_rate": 2.6736111111111112e-05, "loss": 0.4006, "step": 830 }, { "epoch": 20.87, "learning_rate": 2.6388888888888892e-05, "loss": 0.4103, "step": 840 }, { "epoch": 20.99, "eval_accuracy": 0.8490909090909091, "eval_loss": 0.4001007080078125, "eval_runtime": 3.5848, "eval_samples_per_second": 153.424, "eval_steps_per_second": 9.763, "step": 845 }, { "epoch": 21.12, "learning_rate": 2.604166666666667e-05, "loss": 0.3902, "step": 850 }, { "epoch": 21.37, "learning_rate": 2.5694444444444445e-05, "loss": 0.3294, "step": 860 }, { "epoch": 21.61, "learning_rate": 2.534722222222222e-05, "loss": 0.3812, "step": 870 }, { "epoch": 21.86, "learning_rate": 2.5e-05, "loss": 0.3659, "step": 880 }, { "epoch": 21.99, "eval_accuracy": 0.8581818181818182, "eval_loss": 0.3947709798812866, "eval_runtime": 3.6099, "eval_samples_per_second": 152.36, "eval_steps_per_second": 9.696, "step": 885 }, { "epoch": 22.11, "learning_rate": 2.465277777777778e-05, "loss": 0.3471, "step": 890 }, { "epoch": 22.36, "learning_rate": 2.4305555555555558e-05, "loss": 0.3868, "step": 900 }, { "epoch": 22.61, "learning_rate": 2.3958333333333334e-05, "loss": 0.3871, "step": 910 }, { "epoch": 22.86, "learning_rate": 2.361111111111111e-05, "loss": 0.3241, "step": 920 }, { "epoch": 22.98, "eval_accuracy": 0.8581818181818182, "eval_loss": 0.40068891644477844, "eval_runtime": 3.5493, "eval_samples_per_second": 154.959, "eval_steps_per_second": 9.861, "step": 925 }, { "epoch": 23.11, "learning_rate": 2.326388888888889e-05, "loss": 0.3288, "step": 930 }, { "epoch": 23.35, "learning_rate": 2.2916666666666667e-05, "loss": 0.3785, "step": 940 }, { "epoch": 23.6, "learning_rate": 2.2569444444444447e-05, "loss": 0.3412, "step": 950 }, { "epoch": 23.85, "learning_rate": 2.2222222222222223e-05, "loss": 0.3575, "step": 960 }, { "epoch": 24.0, "eval_accuracy": 0.8327272727272728, "eval_loss": 0.43282008171081543, "eval_runtime": 3.7309, "eval_samples_per_second": 147.417, "eval_steps_per_second": 9.381, "step": 966 }, { "epoch": 24.1, "learning_rate": 2.1875e-05, "loss": 0.3713, "step": 970 }, { "epoch": 24.35, "learning_rate": 2.152777777777778e-05, "loss": 0.3191, "step": 980 }, { "epoch": 24.6, "learning_rate": 2.1180555555555556e-05, "loss": 0.3523, "step": 990 }, { "epoch": 24.84, "learning_rate": 2.0833333333333336e-05, "loss": 0.3411, "step": 1000 }, { "epoch": 24.99, "eval_accuracy": 0.8563636363636363, "eval_loss": 0.39898931980133057, "eval_runtime": 3.6919, "eval_samples_per_second": 148.976, "eval_steps_per_second": 9.48, "step": 1006 }, { "epoch": 25.09, "learning_rate": 2.0486111111111113e-05, "loss": 0.3233, "step": 1010 }, { "epoch": 25.34, "learning_rate": 2.013888888888889e-05, "loss": 0.288, "step": 1020 }, { "epoch": 25.59, "learning_rate": 1.9791666666666665e-05, "loss": 0.3424, "step": 1030 }, { "epoch": 25.84, "learning_rate": 1.9444444444444445e-05, "loss": 0.3829, "step": 1040 }, { "epoch": 25.99, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.40113887190818787, "eval_runtime": 3.6378, "eval_samples_per_second": 151.189, "eval_steps_per_second": 9.621, "step": 1046 }, { "epoch": 26.09, "learning_rate": 1.9097222222222222e-05, "loss": 0.3457, "step": 1050 }, { "epoch": 26.34, "learning_rate": 1.8750000000000002e-05, "loss": 0.341, "step": 1060 }, { "epoch": 26.58, "learning_rate": 1.8402777777777778e-05, "loss": 0.3439, "step": 1070 }, { "epoch": 26.83, "learning_rate": 1.8055555555555555e-05, "loss": 0.2855, "step": 1080 }, { "epoch": 26.98, "eval_accuracy": 0.8654545454545455, "eval_loss": 0.3858577013015747, "eval_runtime": 3.6854, "eval_samples_per_second": 149.238, "eval_steps_per_second": 9.497, "step": 1086 }, { "epoch": 27.08, "learning_rate": 1.7708333333333335e-05, "loss": 0.3007, "step": 1090 }, { "epoch": 27.33, "learning_rate": 1.736111111111111e-05, "loss": 0.3321, "step": 1100 }, { "epoch": 27.58, "learning_rate": 1.701388888888889e-05, "loss": 0.2806, "step": 1110 }, { "epoch": 27.83, "learning_rate": 1.6666666666666667e-05, "loss": 0.254, "step": 1120 }, { "epoch": 28.0, "eval_accuracy": 0.8672727272727273, "eval_loss": 0.4196436107158661, "eval_runtime": 3.6909, "eval_samples_per_second": 149.017, "eval_steps_per_second": 9.483, "step": 1127 }, { "epoch": 28.07, "learning_rate": 1.6319444444444444e-05, "loss": 0.3221, "step": 1130 }, { "epoch": 28.32, "learning_rate": 1.597222222222222e-05, "loss": 0.2784, "step": 1140 }, { "epoch": 28.57, "learning_rate": 1.5625e-05, "loss": 0.2717, "step": 1150 }, { "epoch": 28.82, "learning_rate": 1.527777777777778e-05, "loss": 0.2937, "step": 1160 }, { "epoch": 28.99, "eval_accuracy": 0.8618181818181818, "eval_loss": 0.43404603004455566, "eval_runtime": 3.7904, "eval_samples_per_second": 145.103, "eval_steps_per_second": 9.234, "step": 1167 }, { "epoch": 29.07, "learning_rate": 1.4930555555555557e-05, "loss": 0.2744, "step": 1170 }, { "epoch": 29.32, "learning_rate": 1.4583333333333335e-05, "loss": 0.247, "step": 1180 }, { "epoch": 29.57, "learning_rate": 1.4236111111111111e-05, "loss": 0.2614, "step": 1190 }, { "epoch": 29.81, "learning_rate": 1.388888888888889e-05, "loss": 0.258, "step": 1200 }, { "epoch": 29.99, "eval_accuracy": 0.850909090909091, "eval_loss": 0.43866536021232605, "eval_runtime": 3.6539, "eval_samples_per_second": 150.525, "eval_steps_per_second": 9.579, "step": 1207 }, { "epoch": 30.06, "learning_rate": 1.3541666666666666e-05, "loss": 0.3287, "step": 1210 }, { "epoch": 30.31, "learning_rate": 1.3194444444444446e-05, "loss": 0.2767, "step": 1220 }, { "epoch": 30.56, "learning_rate": 1.2847222222222222e-05, "loss": 0.2629, "step": 1230 }, { "epoch": 30.81, "learning_rate": 1.25e-05, "loss": 0.2735, "step": 1240 }, { "epoch": 30.98, "eval_accuracy": 0.8654545454545455, "eval_loss": 0.40970146656036377, "eval_runtime": 3.8014, "eval_samples_per_second": 144.683, "eval_steps_per_second": 9.207, "step": 1247 }, { "epoch": 31.06, "learning_rate": 1.2152777777777779e-05, "loss": 0.3213, "step": 1250 }, { "epoch": 31.3, "learning_rate": 1.1805555555555555e-05, "loss": 0.2501, "step": 1260 }, { "epoch": 31.55, "learning_rate": 1.1458333333333333e-05, "loss": 0.2155, "step": 1270 }, { "epoch": 31.8, "learning_rate": 1.1111111111111112e-05, "loss": 0.2674, "step": 1280 }, { "epoch": 32.0, "eval_accuracy": 0.8527272727272728, "eval_loss": 0.41827914118766785, "eval_runtime": 3.6814, "eval_samples_per_second": 149.401, "eval_steps_per_second": 9.507, "step": 1288 }, { "epoch": 32.05, "learning_rate": 1.076388888888889e-05, "loss": 0.2601, "step": 1290 }, { "epoch": 32.3, "learning_rate": 1.0416666666666668e-05, "loss": 0.2215, "step": 1300 }, { "epoch": 32.55, "learning_rate": 1.0069444444444445e-05, "loss": 0.2523, "step": 1310 }, { "epoch": 32.8, "learning_rate": 9.722222222222223e-06, "loss": 0.2547, "step": 1320 }, { "epoch": 32.99, "eval_accuracy": 0.8636363636363636, "eval_loss": 0.421658456325531, "eval_runtime": 3.9024, "eval_samples_per_second": 140.938, "eval_steps_per_second": 8.969, "step": 1328 }, { "epoch": 33.04, "learning_rate": 9.375000000000001e-06, "loss": 0.2755, "step": 1330 }, { "epoch": 33.29, "learning_rate": 9.027777777777777e-06, "loss": 0.2406, "step": 1340 }, { "epoch": 33.54, "learning_rate": 8.680555555555556e-06, "loss": 0.235, "step": 1350 }, { "epoch": 33.79, "learning_rate": 8.333333333333334e-06, "loss": 0.2109, "step": 1360 }, { "epoch": 33.99, "eval_accuracy": 0.8527272727272728, "eval_loss": 0.42401665449142456, "eval_runtime": 3.8429, "eval_samples_per_second": 143.12, "eval_steps_per_second": 9.108, "step": 1368 }, { "epoch": 34.04, "learning_rate": 7.98611111111111e-06, "loss": 0.278, "step": 1370 }, { "epoch": 34.29, "learning_rate": 7.63888888888889e-06, "loss": 0.2164, "step": 1380 }, { "epoch": 34.53, "learning_rate": 7.2916666666666674e-06, "loss": 0.2403, "step": 1390 }, { "epoch": 34.78, "learning_rate": 6.944444444444445e-06, "loss": 0.2248, "step": 1400 }, { "epoch": 34.98, "eval_accuracy": 0.86, "eval_loss": 0.4249715805053711, "eval_runtime": 3.9044, "eval_samples_per_second": 140.865, "eval_steps_per_second": 8.964, "step": 1408 }, { "epoch": 35.03, "learning_rate": 6.597222222222223e-06, "loss": 0.249, "step": 1410 }, { "epoch": 35.28, "learning_rate": 6.25e-06, "loss": 0.2012, "step": 1420 }, { "epoch": 35.53, "learning_rate": 5.902777777777778e-06, "loss": 0.2184, "step": 1430 }, { "epoch": 35.78, "learning_rate": 5.555555555555556e-06, "loss": 0.2397, "step": 1440 }, { "epoch": 36.0, "eval_accuracy": 0.8581818181818182, "eval_loss": 0.44308120012283325, "eval_runtime": 3.7259, "eval_samples_per_second": 147.616, "eval_steps_per_second": 9.394, "step": 1449 }, { "epoch": 36.02, "learning_rate": 5.208333333333334e-06, "loss": 0.2341, "step": 1450 }, { "epoch": 36.27, "learning_rate": 4.861111111111111e-06, "loss": 0.2272, "step": 1460 }, { "epoch": 36.52, "learning_rate": 4.513888888888889e-06, "loss": 0.216, "step": 1470 }, { "epoch": 36.77, "learning_rate": 4.166666666666667e-06, "loss": 0.1823, "step": 1480 }, { "epoch": 36.99, "eval_accuracy": 0.8581818181818182, "eval_loss": 0.4442140758037567, "eval_runtime": 3.7874, "eval_samples_per_second": 145.218, "eval_steps_per_second": 9.241, "step": 1489 }, { "epoch": 37.02, "learning_rate": 3.819444444444445e-06, "loss": 0.1781, "step": 1490 }, { "epoch": 37.27, "learning_rate": 3.4722222222222224e-06, "loss": 0.201, "step": 1500 }, { "epoch": 37.52, "learning_rate": 3.125e-06, "loss": 0.1889, "step": 1510 }, { "epoch": 37.76, "learning_rate": 2.777777777777778e-06, "loss": 0.1834, "step": 1520 }, { "epoch": 37.99, "eval_accuracy": 0.8618181818181818, "eval_loss": 0.4361916780471802, "eval_runtime": 3.6724, "eval_samples_per_second": 149.767, "eval_steps_per_second": 9.531, "step": 1529 }, { "epoch": 38.01, "learning_rate": 2.4305555555555557e-06, "loss": 0.2874, "step": 1530 }, { "epoch": 38.26, "learning_rate": 2.0833333333333334e-06, "loss": 0.2249, "step": 1540 }, { "epoch": 38.51, "learning_rate": 1.7361111111111112e-06, "loss": 0.2189, "step": 1550 }, { "epoch": 38.76, "learning_rate": 1.388888888888889e-06, "loss": 0.1864, "step": 1560 }, { "epoch": 38.98, "eval_accuracy": 0.8545454545454545, "eval_loss": 0.43384620547294617, "eval_runtime": 3.7839, "eval_samples_per_second": 145.353, "eval_steps_per_second": 9.25, "step": 1569 }, { "epoch": 39.01, "learning_rate": 1.0416666666666667e-06, "loss": 0.1708, "step": 1570 }, { "epoch": 39.25, "learning_rate": 6.944444444444445e-07, "loss": 0.2142, "step": 1580 }, { "epoch": 39.5, "learning_rate": 3.4722222222222224e-07, "loss": 0.1828, "step": 1590 }, { "epoch": 39.75, "learning_rate": 0.0, "loss": 0.1779, "step": 1600 }, { "epoch": 39.75, "eval_accuracy": 0.8581818181818182, "eval_loss": 0.4332370162010193, "eval_runtime": 3.7169, "eval_samples_per_second": 147.975, "eval_steps_per_second": 9.417, "step": 1600 }, { "epoch": 39.75, "step": 1600, "total_flos": 7.892728807229393e+18, "train_loss": 0.45905578792095186, "train_runtime": 1870.7092, "train_samples_per_second": 54.803, "train_steps_per_second": 0.855 } ], "logging_steps": 10, "max_steps": 1600, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 7.892728807229393e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }