|
{ |
|
"best_metric": 0.8227848101265823, |
|
"best_model_checkpoint": "deit-base-distilled-patch16-224-55-fold5/checkpoint-150", |
|
"epoch": 85.71428571428571, |
|
"eval_steps": 500, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"eval_accuracy": 0.5189873417721519, |
|
"eval_loss": 0.7318889498710632, |
|
"eval_runtime": 1.1213, |
|
"eval_samples_per_second": 70.455, |
|
"eval_steps_per_second": 2.676, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.569620253164557, |
|
"eval_loss": 0.675592303276062, |
|
"eval_runtime": 1.1562, |
|
"eval_samples_per_second": 68.326, |
|
"eval_steps_per_second": 2.595, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 1.9025880098342896, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.6544, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"eval_accuracy": 0.6455696202531646, |
|
"eval_loss": 0.619909405708313, |
|
"eval_runtime": 1.2043, |
|
"eval_samples_per_second": 65.598, |
|
"eval_steps_per_second": 2.491, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.620253164556962, |
|
"eval_loss": 0.5987372994422913, |
|
"eval_runtime": 1.1134, |
|
"eval_samples_per_second": 70.952, |
|
"eval_steps_per_second": 2.694, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 4.857142857142857, |
|
"eval_accuracy": 0.6708860759493671, |
|
"eval_loss": 0.5676239728927612, |
|
"eval_runtime": 1.1193, |
|
"eval_samples_per_second": 70.581, |
|
"eval_steps_per_second": 2.68, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 2.5250775814056396, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.6173, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5822784810126582, |
|
"eval_loss": 0.6542880535125732, |
|
"eval_runtime": 1.1319, |
|
"eval_samples_per_second": 69.791, |
|
"eval_steps_per_second": 2.65, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 6.857142857142857, |
|
"eval_accuracy": 0.7341772151898734, |
|
"eval_loss": 0.5309708118438721, |
|
"eval_runtime": 1.1635, |
|
"eval_samples_per_second": 67.9, |
|
"eval_steps_per_second": 2.578, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6075949367088608, |
|
"eval_loss": 0.6724244356155396, |
|
"eval_runtime": 1.1519, |
|
"eval_samples_per_second": 68.58, |
|
"eval_steps_per_second": 2.604, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 8.571428571428571, |
|
"grad_norm": 3.0099055767059326, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5245, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 8.857142857142858, |
|
"eval_accuracy": 0.6582278481012658, |
|
"eval_loss": 0.6443866491317749, |
|
"eval_runtime": 1.1308, |
|
"eval_samples_per_second": 69.861, |
|
"eval_steps_per_second": 2.653, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7341772151898734, |
|
"eval_loss": 0.5026643872261047, |
|
"eval_runtime": 1.1348, |
|
"eval_samples_per_second": 69.614, |
|
"eval_steps_per_second": 2.644, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 10.857142857142858, |
|
"eval_accuracy": 0.6582278481012658, |
|
"eval_loss": 0.6327927708625793, |
|
"eval_runtime": 1.1376, |
|
"eval_samples_per_second": 69.443, |
|
"eval_steps_per_second": 2.637, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 11.428571428571429, |
|
"grad_norm": 3.429919719696045, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.4554, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.759493670886076, |
|
"eval_loss": 0.48831707239151, |
|
"eval_runtime": 1.1619, |
|
"eval_samples_per_second": 67.99, |
|
"eval_steps_per_second": 2.582, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 12.857142857142858, |
|
"eval_accuracy": 0.6582278481012658, |
|
"eval_loss": 0.6736446619033813, |
|
"eval_runtime": 1.1393, |
|
"eval_samples_per_second": 69.338, |
|
"eval_steps_per_second": 2.633, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7341772151898734, |
|
"eval_loss": 0.4583931863307953, |
|
"eval_runtime": 1.1376, |
|
"eval_samples_per_second": 69.444, |
|
"eval_steps_per_second": 2.637, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 14.285714285714286, |
|
"grad_norm": 3.318418264389038, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.4575, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 14.857142857142858, |
|
"eval_accuracy": 0.6455696202531646, |
|
"eval_loss": 0.8099328875541687, |
|
"eval_runtime": 1.1512, |
|
"eval_samples_per_second": 68.622, |
|
"eval_steps_per_second": 2.606, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7468354430379747, |
|
"eval_loss": 0.47672876715660095, |
|
"eval_runtime": 1.1261, |
|
"eval_samples_per_second": 70.154, |
|
"eval_steps_per_second": 2.664, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 16.857142857142858, |
|
"eval_accuracy": 0.6835443037974683, |
|
"eval_loss": 0.6058729290962219, |
|
"eval_runtime": 1.1442, |
|
"eval_samples_per_second": 69.041, |
|
"eval_steps_per_second": 2.622, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 17.142857142857142, |
|
"grad_norm": 4.997932434082031, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.3798, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.759493670886076, |
|
"eval_loss": 0.4863268733024597, |
|
"eval_runtime": 1.1309, |
|
"eval_samples_per_second": 69.854, |
|
"eval_steps_per_second": 2.653, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 18.857142857142858, |
|
"eval_accuracy": 0.7468354430379747, |
|
"eval_loss": 0.5635764598846436, |
|
"eval_runtime": 1.1455, |
|
"eval_samples_per_second": 68.967, |
|
"eval_steps_per_second": 2.619, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.959798574447632, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.3419, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7341772151898734, |
|
"eval_loss": 0.4677181839942932, |
|
"eval_runtime": 1.1321, |
|
"eval_samples_per_second": 69.782, |
|
"eval_steps_per_second": 2.65, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 20.857142857142858, |
|
"eval_accuracy": 0.7088607594936709, |
|
"eval_loss": 0.4883308410644531, |
|
"eval_runtime": 1.1375, |
|
"eval_samples_per_second": 69.453, |
|
"eval_steps_per_second": 2.637, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7215189873417721, |
|
"eval_loss": 0.5549487471580505, |
|
"eval_runtime": 1.1347, |
|
"eval_samples_per_second": 69.623, |
|
"eval_steps_per_second": 2.644, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"grad_norm": 6.16960334777832, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.3079, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"eval_accuracy": 0.7848101265822784, |
|
"eval_loss": 0.4324057400226593, |
|
"eval_runtime": 1.1312, |
|
"eval_samples_per_second": 69.84, |
|
"eval_steps_per_second": 2.652, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6708860759493671, |
|
"eval_loss": 0.6183649897575378, |
|
"eval_runtime": 1.1713, |
|
"eval_samples_per_second": 67.446, |
|
"eval_steps_per_second": 2.561, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 24.857142857142858, |
|
"eval_accuracy": 0.7088607594936709, |
|
"eval_loss": 0.6148533821105957, |
|
"eval_runtime": 1.147, |
|
"eval_samples_per_second": 68.878, |
|
"eval_steps_per_second": 2.616, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 25.714285714285715, |
|
"grad_norm": 3.098267078399658, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.2616, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7848101265822784, |
|
"eval_loss": 0.44876629114151, |
|
"eval_runtime": 1.1303, |
|
"eval_samples_per_second": 69.895, |
|
"eval_steps_per_second": 2.654, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 26.857142857142858, |
|
"eval_accuracy": 0.7721518987341772, |
|
"eval_loss": 0.43682238459587097, |
|
"eval_runtime": 1.1557, |
|
"eval_samples_per_second": 68.354, |
|
"eval_steps_per_second": 2.596, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7721518987341772, |
|
"eval_loss": 0.45659732818603516, |
|
"eval_runtime": 1.1429, |
|
"eval_samples_per_second": 69.124, |
|
"eval_steps_per_second": 2.625, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"grad_norm": 4.5599517822265625, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.2157, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 28.857142857142858, |
|
"eval_accuracy": 0.7848101265822784, |
|
"eval_loss": 0.4657270312309265, |
|
"eval_runtime": 1.1397, |
|
"eval_samples_per_second": 69.319, |
|
"eval_steps_per_second": 2.632, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7721518987341772, |
|
"eval_loss": 0.4514474868774414, |
|
"eval_runtime": 1.1483, |
|
"eval_samples_per_second": 68.795, |
|
"eval_steps_per_second": 2.612, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 30.857142857142858, |
|
"eval_accuracy": 0.7848101265822784, |
|
"eval_loss": 0.5082857608795166, |
|
"eval_runtime": 1.1218, |
|
"eval_samples_per_second": 70.425, |
|
"eval_steps_per_second": 2.674, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 31.428571428571427, |
|
"grad_norm": 2.26061749458313, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.2258, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7848101265822784, |
|
"eval_loss": 0.5260893106460571, |
|
"eval_runtime": 1.1489, |
|
"eval_samples_per_second": 68.759, |
|
"eval_steps_per_second": 2.611, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 32.857142857142854, |
|
"eval_accuracy": 0.759493670886076, |
|
"eval_loss": 0.556735634803772, |
|
"eval_runtime": 1.1778, |
|
"eval_samples_per_second": 67.076, |
|
"eval_steps_per_second": 2.547, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.810126582278481, |
|
"eval_loss": 0.5566364526748657, |
|
"eval_runtime": 1.1382, |
|
"eval_samples_per_second": 69.406, |
|
"eval_steps_per_second": 2.636, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 34.285714285714285, |
|
"grad_norm": 2.7529988288879395, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.1972, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 34.857142857142854, |
|
"eval_accuracy": 0.810126582278481, |
|
"eval_loss": 0.5494687557220459, |
|
"eval_runtime": 1.1486, |
|
"eval_samples_per_second": 68.78, |
|
"eval_steps_per_second": 2.612, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7974683544303798, |
|
"eval_loss": 0.49917009472846985, |
|
"eval_runtime": 1.1413, |
|
"eval_samples_per_second": 69.221, |
|
"eval_steps_per_second": 2.629, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 36.857142857142854, |
|
"eval_accuracy": 0.759493670886076, |
|
"eval_loss": 0.566094696521759, |
|
"eval_runtime": 1.1393, |
|
"eval_samples_per_second": 69.341, |
|
"eval_steps_per_second": 2.633, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 37.142857142857146, |
|
"grad_norm": 3.6283090114593506, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.1709, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7341772151898734, |
|
"eval_loss": 0.7325516939163208, |
|
"eval_runtime": 1.1565, |
|
"eval_samples_per_second": 68.311, |
|
"eval_steps_per_second": 2.594, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 38.857142857142854, |
|
"eval_accuracy": 0.810126582278481, |
|
"eval_loss": 0.5634633898735046, |
|
"eval_runtime": 1.1297, |
|
"eval_samples_per_second": 69.93, |
|
"eval_steps_per_second": 2.656, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 2.0195701122283936, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.1537, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7468354430379747, |
|
"eval_loss": 0.8130123019218445, |
|
"eval_runtime": 1.1502, |
|
"eval_samples_per_second": 68.685, |
|
"eval_steps_per_second": 2.608, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 40.857142857142854, |
|
"eval_accuracy": 0.7848101265822784, |
|
"eval_loss": 0.6983793377876282, |
|
"eval_runtime": 1.1474, |
|
"eval_samples_per_second": 68.849, |
|
"eval_steps_per_second": 2.615, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.759493670886076, |
|
"eval_loss": 0.7777484655380249, |
|
"eval_runtime": 1.1447, |
|
"eval_samples_per_second": 69.011, |
|
"eval_steps_per_second": 2.621, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 42.857142857142854, |
|
"grad_norm": 3.92864727973938, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.1687, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 42.857142857142854, |
|
"eval_accuracy": 0.8227848101265823, |
|
"eval_loss": 0.645160436630249, |
|
"eval_runtime": 1.1266, |
|
"eval_samples_per_second": 70.123, |
|
"eval_steps_per_second": 2.663, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7215189873417721, |
|
"eval_loss": 0.8527082800865173, |
|
"eval_runtime": 1.162, |
|
"eval_samples_per_second": 67.988, |
|
"eval_steps_per_second": 2.582, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 44.857142857142854, |
|
"eval_accuracy": 0.7974683544303798, |
|
"eval_loss": 0.6483196020126343, |
|
"eval_runtime": 1.1395, |
|
"eval_samples_per_second": 69.329, |
|
"eval_steps_per_second": 2.633, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 45.714285714285715, |
|
"grad_norm": 2.570295810699463, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.1588, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.7341772151898734, |
|
"eval_loss": 0.8185186982154846, |
|
"eval_runtime": 1.1438, |
|
"eval_samples_per_second": 69.068, |
|
"eval_steps_per_second": 2.623, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 46.857142857142854, |
|
"eval_accuracy": 0.7721518987341772, |
|
"eval_loss": 0.6821473836898804, |
|
"eval_runtime": 1.174, |
|
"eval_samples_per_second": 67.29, |
|
"eval_steps_per_second": 2.555, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7341772151898734, |
|
"eval_loss": 0.7593724131584167, |
|
"eval_runtime": 1.1514, |
|
"eval_samples_per_second": 68.611, |
|
"eval_steps_per_second": 2.605, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 48.57142857142857, |
|
"grad_norm": 2.7910444736480713, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.144, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 48.857142857142854, |
|
"eval_accuracy": 0.759493670886076, |
|
"eval_loss": 1.0232322216033936, |
|
"eval_runtime": 1.1447, |
|
"eval_samples_per_second": 69.012, |
|
"eval_steps_per_second": 2.621, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.7848101265822784, |
|
"eval_loss": 0.6178007125854492, |
|
"eval_runtime": 1.1535, |
|
"eval_samples_per_second": 68.485, |
|
"eval_steps_per_second": 2.601, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 50.857142857142854, |
|
"eval_accuracy": 0.759493670886076, |
|
"eval_loss": 0.6243430376052856, |
|
"eval_runtime": 1.149, |
|
"eval_samples_per_second": 68.756, |
|
"eval_steps_per_second": 2.611, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 51.42857142857143, |
|
"grad_norm": 2.441843271255493, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.1449, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.7341772151898734, |
|
"eval_loss": 0.8159094452857971, |
|
"eval_runtime": 1.1388, |
|
"eval_samples_per_second": 69.372, |
|
"eval_steps_per_second": 2.634, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 52.857142857142854, |
|
"eval_accuracy": 0.7721518987341772, |
|
"eval_loss": 0.6664446592330933, |
|
"eval_runtime": 1.1472, |
|
"eval_samples_per_second": 68.865, |
|
"eval_steps_per_second": 2.615, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.7341772151898734, |
|
"eval_loss": 0.7069984078407288, |
|
"eval_runtime": 1.1371, |
|
"eval_samples_per_second": 69.475, |
|
"eval_steps_per_second": 2.638, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 54.285714285714285, |
|
"grad_norm": 1.9345264434814453, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.144, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 54.857142857142854, |
|
"eval_accuracy": 0.7468354430379747, |
|
"eval_loss": 0.7360506057739258, |
|
"eval_runtime": 1.1264, |
|
"eval_samples_per_second": 70.132, |
|
"eval_steps_per_second": 2.663, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.759493670886076, |
|
"eval_loss": 0.6655969023704529, |
|
"eval_runtime": 1.1372, |
|
"eval_samples_per_second": 69.471, |
|
"eval_steps_per_second": 2.638, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 56.857142857142854, |
|
"eval_accuracy": 0.7468354430379747, |
|
"eval_loss": 0.7487228512763977, |
|
"eval_runtime": 1.158, |
|
"eval_samples_per_second": 68.222, |
|
"eval_steps_per_second": 2.591, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 57.142857142857146, |
|
"grad_norm": 2.869379758834839, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.1199, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.7341772151898734, |
|
"eval_loss": 0.7992926239967346, |
|
"eval_runtime": 1.1371, |
|
"eval_samples_per_second": 69.472, |
|
"eval_steps_per_second": 2.638, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 58.857142857142854, |
|
"eval_accuracy": 0.7721518987341772, |
|
"eval_loss": 0.7426438927650452, |
|
"eval_runtime": 1.1515, |
|
"eval_samples_per_second": 68.608, |
|
"eval_steps_per_second": 2.605, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 3.277022123336792, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.1258, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7974683544303798, |
|
"eval_loss": 0.7530637979507446, |
|
"eval_runtime": 1.1459, |
|
"eval_samples_per_second": 68.943, |
|
"eval_steps_per_second": 2.618, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 60.857142857142854, |
|
"eval_accuracy": 0.7848101265822784, |
|
"eval_loss": 0.7388187646865845, |
|
"eval_runtime": 1.1538, |
|
"eval_samples_per_second": 68.468, |
|
"eval_steps_per_second": 2.6, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.7974683544303798, |
|
"eval_loss": 0.7394750714302063, |
|
"eval_runtime": 1.1338, |
|
"eval_samples_per_second": 69.677, |
|
"eval_steps_per_second": 2.646, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 62.857142857142854, |
|
"grad_norm": 2.4016706943511963, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.1392, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 62.857142857142854, |
|
"eval_accuracy": 0.7468354430379747, |
|
"eval_loss": 0.8237771391868591, |
|
"eval_runtime": 1.1382, |
|
"eval_samples_per_second": 69.41, |
|
"eval_steps_per_second": 2.636, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.7215189873417721, |
|
"eval_loss": 0.9302475452423096, |
|
"eval_runtime": 1.1601, |
|
"eval_samples_per_second": 68.098, |
|
"eval_steps_per_second": 2.586, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 64.85714285714286, |
|
"eval_accuracy": 0.7721518987341772, |
|
"eval_loss": 0.7539066076278687, |
|
"eval_runtime": 1.142, |
|
"eval_samples_per_second": 69.178, |
|
"eval_steps_per_second": 2.627, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 65.71428571428571, |
|
"grad_norm": 2.6353225708007812, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.1303, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.810126582278481, |
|
"eval_loss": 0.6738837361335754, |
|
"eval_runtime": 1.127, |
|
"eval_samples_per_second": 70.095, |
|
"eval_steps_per_second": 2.662, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 66.85714285714286, |
|
"eval_accuracy": 0.7848101265822784, |
|
"eval_loss": 0.662686824798584, |
|
"eval_runtime": 1.1663, |
|
"eval_samples_per_second": 67.734, |
|
"eval_steps_per_second": 2.572, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.7848101265822784, |
|
"eval_loss": 0.640326201915741, |
|
"eval_runtime": 1.133, |
|
"eval_samples_per_second": 69.724, |
|
"eval_steps_per_second": 2.648, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 68.57142857142857, |
|
"grad_norm": 3.1281940937042236, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.1423, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 68.85714285714286, |
|
"eval_accuracy": 0.7974683544303798, |
|
"eval_loss": 0.6378774642944336, |
|
"eval_runtime": 1.1326, |
|
"eval_samples_per_second": 69.753, |
|
"eval_steps_per_second": 2.649, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.759493670886076, |
|
"eval_loss": 0.7658242583274841, |
|
"eval_runtime": 1.1438, |
|
"eval_samples_per_second": 69.068, |
|
"eval_steps_per_second": 2.623, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 70.85714285714286, |
|
"eval_accuracy": 0.7341772151898734, |
|
"eval_loss": 0.9195311069488525, |
|
"eval_runtime": 1.1256, |
|
"eval_samples_per_second": 70.185, |
|
"eval_steps_per_second": 2.665, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 71.42857142857143, |
|
"grad_norm": 2.4990944862365723, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.1019, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.7721518987341772, |
|
"eval_loss": 0.7286852598190308, |
|
"eval_runtime": 1.1291, |
|
"eval_samples_per_second": 69.969, |
|
"eval_steps_per_second": 2.657, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 72.85714285714286, |
|
"eval_accuracy": 0.7974683544303798, |
|
"eval_loss": 0.6548206210136414, |
|
"eval_runtime": 1.127, |
|
"eval_samples_per_second": 70.098, |
|
"eval_steps_per_second": 2.662, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.7848101265822784, |
|
"eval_loss": 0.653405487537384, |
|
"eval_runtime": 1.1305, |
|
"eval_samples_per_second": 69.88, |
|
"eval_steps_per_second": 2.654, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 74.28571428571429, |
|
"grad_norm": 2.0475404262542725, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.1286, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 74.85714285714286, |
|
"eval_accuracy": 0.7848101265822784, |
|
"eval_loss": 0.7330575585365295, |
|
"eval_runtime": 1.1366, |
|
"eval_samples_per_second": 69.508, |
|
"eval_steps_per_second": 2.64, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.759493670886076, |
|
"eval_loss": 0.7845055460929871, |
|
"eval_runtime": 1.1356, |
|
"eval_samples_per_second": 69.566, |
|
"eval_steps_per_second": 2.642, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 76.85714285714286, |
|
"eval_accuracy": 0.7974683544303798, |
|
"eval_loss": 0.7187694311141968, |
|
"eval_runtime": 1.1547, |
|
"eval_samples_per_second": 68.416, |
|
"eval_steps_per_second": 2.598, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 77.14285714285714, |
|
"grad_norm": 3.98937726020813, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.1054, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.7721518987341772, |
|
"eval_loss": 0.6595200896263123, |
|
"eval_runtime": 1.1479, |
|
"eval_samples_per_second": 68.823, |
|
"eval_steps_per_second": 2.614, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 78.85714285714286, |
|
"eval_accuracy": 0.7721518987341772, |
|
"eval_loss": 0.662256121635437, |
|
"eval_runtime": 1.1304, |
|
"eval_samples_per_second": 69.885, |
|
"eval_steps_per_second": 2.654, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 1.8091683387756348, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.1053, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.7721518987341772, |
|
"eval_loss": 0.7336876392364502, |
|
"eval_runtime": 1.1418, |
|
"eval_samples_per_second": 69.186, |
|
"eval_steps_per_second": 2.627, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 80.85714285714286, |
|
"eval_accuracy": 0.7468354430379747, |
|
"eval_loss": 0.8085371851921082, |
|
"eval_runtime": 1.1388, |
|
"eval_samples_per_second": 69.372, |
|
"eval_steps_per_second": 2.634, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.7468354430379747, |
|
"eval_loss": 0.8200588226318359, |
|
"eval_runtime": 1.1478, |
|
"eval_samples_per_second": 68.825, |
|
"eval_steps_per_second": 2.614, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 82.85714285714286, |
|
"grad_norm": 3.0363099575042725, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.1086, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 82.85714285714286, |
|
"eval_accuracy": 0.7468354430379747, |
|
"eval_loss": 0.7947056293487549, |
|
"eval_runtime": 1.1218, |
|
"eval_samples_per_second": 70.419, |
|
"eval_steps_per_second": 2.674, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.7721518987341772, |
|
"eval_loss": 0.7668984532356262, |
|
"eval_runtime": 1.1428, |
|
"eval_samples_per_second": 69.129, |
|
"eval_steps_per_second": 2.625, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 84.85714285714286, |
|
"eval_accuracy": 0.759493670886076, |
|
"eval_loss": 0.7581708431243896, |
|
"eval_runtime": 1.1285, |
|
"eval_samples_per_second": 70.006, |
|
"eval_steps_per_second": 2.658, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 85.71428571428571, |
|
"grad_norm": 2.2897379398345947, |
|
"learning_rate": 0.0, |
|
"loss": 0.1186, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 85.71428571428571, |
|
"eval_accuracy": 0.759493670886076, |
|
"eval_loss": 0.7541030645370483, |
|
"eval_runtime": 1.1594, |
|
"eval_samples_per_second": 68.138, |
|
"eval_steps_per_second": 2.588, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 85.71428571428571, |
|
"step": 300, |
|
"total_flos": 2.9362240500074496e+18, |
|
"train_loss": 0.23499208807945252, |
|
"train_runtime": 1665.1689, |
|
"train_samples_per_second": 26.544, |
|
"train_steps_per_second": 0.18 |
|
}, |
|
{ |
|
"epoch": 85.71428571428571, |
|
"eval_accuracy": 0.8227848101265823, |
|
"eval_loss": 0.645160436630249, |
|
"eval_runtime": 1.1931, |
|
"eval_samples_per_second": 66.215, |
|
"eval_steps_per_second": 2.514, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.9362240500074496e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|