|
{ |
|
"best_metric": 0.40238592824799724, |
|
"best_model_checkpoint": "/groups/claytonm/enoriega/kw_pubmed/kw_pubmed_5000_0.000006/checkpoint-435", |
|
"epoch": 3.795931045193353, |
|
"global_step": 585, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.968831168831169e-06, |
|
"loss": 4.3584, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.937662337662337e-06, |
|
"loss": 4.0315, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5.8987012987012994e-06, |
|
"loss": 3.9357, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.37917972831765934, |
|
"eval_loss": 3.8932790756225586, |
|
"eval_runtime": 16.4606, |
|
"eval_samples_per_second": 607.512, |
|
"eval_steps_per_second": 19.015, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5.85974025974026e-06, |
|
"loss": 3.8549, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 5.820779220779221e-06, |
|
"loss": 3.7904, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5.781818181818182e-06, |
|
"loss": 3.7574, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.38153082549634276, |
|
"eval_loss": 3.8384764194488525, |
|
"eval_runtime": 16.5354, |
|
"eval_samples_per_second": 604.762, |
|
"eval_steps_per_second": 18.929, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5.742857142857143e-06, |
|
"loss": 3.7476, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5.703896103896104e-06, |
|
"loss": 3.6722, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5.664935064935065e-06, |
|
"loss": 3.6652, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.38283699059561127, |
|
"eval_loss": 3.812371015548706, |
|
"eval_runtime": 16.5574, |
|
"eval_samples_per_second": 603.961, |
|
"eval_steps_per_second": 18.904, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 5.625974025974026e-06, |
|
"loss": 3.6491, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.587012987012987e-06, |
|
"loss": 3.6096, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 5.548051948051948e-06, |
|
"loss": 3.5893, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.3845785440613027, |
|
"eval_loss": 3.786245584487915, |
|
"eval_runtime": 16.5595, |
|
"eval_samples_per_second": 603.881, |
|
"eval_steps_per_second": 18.901, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.50909090909091e-06, |
|
"loss": 3.5818, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.47012987012987e-06, |
|
"loss": 3.548, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.431168831168831e-06, |
|
"loss": 3.5135, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.389063044235458, |
|
"eval_loss": 3.7630181312561035, |
|
"eval_runtime": 17.3582, |
|
"eval_samples_per_second": 576.098, |
|
"eval_steps_per_second": 18.032, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.3922077922077925e-06, |
|
"loss": 3.51, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5.353246753246754e-06, |
|
"loss": 3.502, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5.314285714285714e-06, |
|
"loss": 3.4776, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.38788749564611635, |
|
"eval_loss": 3.7638540267944336, |
|
"eval_runtime": 17.4685, |
|
"eval_samples_per_second": 572.46, |
|
"eval_steps_per_second": 17.918, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5.275324675324675e-06, |
|
"loss": 3.455, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.2363636363636365e-06, |
|
"loss": 3.4606, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.197402597402598e-06, |
|
"loss": 3.4083, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.3912835249042146, |
|
"eval_loss": 3.7579634189605713, |
|
"eval_runtime": 17.4161, |
|
"eval_samples_per_second": 574.181, |
|
"eval_steps_per_second": 17.972, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.158441558441558e-06, |
|
"loss": 3.3844, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.119480519480519e-06, |
|
"loss": 3.4324, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5.0805194805194805e-06, |
|
"loss": 3.3791, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.38784395680947403, |
|
"eval_loss": 3.7581143379211426, |
|
"eval_runtime": 17.4647, |
|
"eval_samples_per_second": 572.583, |
|
"eval_steps_per_second": 17.922, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.041558441558442e-06, |
|
"loss": 3.3814, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5.002597402597402e-06, |
|
"loss": 3.354, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.963636363636364e-06, |
|
"loss": 3.3557, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.39150121908742597, |
|
"eval_loss": 3.74817156791687, |
|
"eval_runtime": 17.4451, |
|
"eval_samples_per_second": 573.227, |
|
"eval_steps_per_second": 17.942, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.924675324675325e-06, |
|
"loss": 3.3157, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.885714285714286e-06, |
|
"loss": 3.3465, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.846753246753247e-06, |
|
"loss": 3.3077, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.39080459770114945, |
|
"eval_loss": 3.749429702758789, |
|
"eval_runtime": 17.4638, |
|
"eval_samples_per_second": 572.613, |
|
"eval_steps_per_second": 17.923, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.807792207792208e-06, |
|
"loss": 3.6416, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.768831168831169e-06, |
|
"loss": 3.2947, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.72987012987013e-06, |
|
"loss": 3.2584, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.39372169975618254, |
|
"eval_loss": 3.7410030364990234, |
|
"eval_runtime": 17.4385, |
|
"eval_samples_per_second": 573.445, |
|
"eval_steps_per_second": 17.949, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.690909090909092e-06, |
|
"loss": 3.2643, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.651948051948052e-06, |
|
"loss": 3.2647, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.612987012987013e-06, |
|
"loss": 3.2459, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_accuracy": 0.39167537443399514, |
|
"eval_loss": 3.7585999965667725, |
|
"eval_runtime": 17.4515, |
|
"eval_samples_per_second": 573.017, |
|
"eval_steps_per_second": 17.935, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.574025974025974e-06, |
|
"loss": 3.2303, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.535064935064936e-06, |
|
"loss": 3.2218, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.496103896103896e-06, |
|
"loss": 3.214, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_accuracy": 0.3966823406478579, |
|
"eval_loss": 3.7317585945129395, |
|
"eval_runtime": 17.4059, |
|
"eval_samples_per_second": 574.518, |
|
"eval_steps_per_second": 17.982, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.457142857142857e-06, |
|
"loss": 3.1917, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.418181818181818e-06, |
|
"loss": 3.185, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.37922077922078e-06, |
|
"loss": 3.2013, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.39707419017763845, |
|
"eval_loss": 3.7158472537994385, |
|
"eval_runtime": 17.4332, |
|
"eval_samples_per_second": 573.618, |
|
"eval_steps_per_second": 17.954, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.34025974025974e-06, |
|
"loss": 3.1968, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.301298701298701e-06, |
|
"loss": 3.1735, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.262337662337662e-06, |
|
"loss": 3.1887, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_accuracy": 0.3923284569836294, |
|
"eval_loss": 3.7491486072540283, |
|
"eval_runtime": 17.4492, |
|
"eval_samples_per_second": 573.091, |
|
"eval_steps_per_second": 17.938, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.2233766233766236e-06, |
|
"loss": 3.1663, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.184415584415584e-06, |
|
"loss": 3.1689, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.145454545454546e-06, |
|
"loss": 3.1359, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.39655172413793105, |
|
"eval_loss": 3.7439537048339844, |
|
"eval_runtime": 17.4433, |
|
"eval_samples_per_second": 573.287, |
|
"eval_steps_per_second": 17.944, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.106493506493506e-06, |
|
"loss": 3.162, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.0675324675324675e-06, |
|
"loss": 3.1473, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.028571428571429e-06, |
|
"loss": 3.1424, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_accuracy": 0.3979014280738419, |
|
"eval_loss": 3.724943161010742, |
|
"eval_runtime": 17.4625, |
|
"eval_samples_per_second": 572.655, |
|
"eval_steps_per_second": 17.924, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.98961038961039e-06, |
|
"loss": 3.1377, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.950649350649351e-06, |
|
"loss": 3.1185, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.9116883116883115e-06, |
|
"loss": 3.124, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.397466039707419, |
|
"eval_loss": 3.734914779663086, |
|
"eval_runtime": 17.4427, |
|
"eval_samples_per_second": 573.305, |
|
"eval_steps_per_second": 17.944, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.8727272727272735e-06, |
|
"loss": 3.0861, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.833766233766234e-06, |
|
"loss": 3.08, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.794805194805195e-06, |
|
"loss": 3.0793, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.3981191222570533, |
|
"eval_loss": 3.7173280715942383, |
|
"eval_runtime": 17.4324, |
|
"eval_samples_per_second": 573.645, |
|
"eval_steps_per_second": 17.955, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.755844155844156e-06, |
|
"loss": 3.0642, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.716883116883117e-06, |
|
"loss": 3.0848, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.677922077922078e-06, |
|
"loss": 3.0527, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.39973005921281785, |
|
"eval_loss": 3.722606658935547, |
|
"eval_runtime": 17.44, |
|
"eval_samples_per_second": 573.394, |
|
"eval_steps_per_second": 17.947, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.638961038961039e-06, |
|
"loss": 3.062, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.6e-06, |
|
"loss": 3.3985, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.5610389610389615e-06, |
|
"loss": 3.049, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_accuracy": 0.3986415882967607, |
|
"eval_loss": 3.7341573238372803, |
|
"eval_runtime": 17.4676, |
|
"eval_samples_per_second": 572.489, |
|
"eval_steps_per_second": 17.919, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.522077922077922e-06, |
|
"loss": 3.0231, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.4831168831168834e-06, |
|
"loss": 3.0292, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 3.4441558441558442e-06, |
|
"loss": 3.0422, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.3981191222570533, |
|
"eval_loss": 3.747906446456909, |
|
"eval_runtime": 17.4308, |
|
"eval_samples_per_second": 573.698, |
|
"eval_steps_per_second": 17.957, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.4051948051948054e-06, |
|
"loss": 3.0296, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.366233766233766e-06, |
|
"loss": 3.042, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.3272727272727274e-06, |
|
"loss": 3.0078, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_accuracy": 0.39502786485545105, |
|
"eval_loss": 3.7774932384490967, |
|
"eval_runtime": 17.3904, |
|
"eval_samples_per_second": 575.029, |
|
"eval_steps_per_second": 17.998, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.288311688311688e-06, |
|
"loss": 3.0348, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.2493506493506494e-06, |
|
"loss": 3.0249, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3.21038961038961e-06, |
|
"loss": 2.9674, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_accuracy": 0.3997735980494601, |
|
"eval_loss": 3.741637945175171, |
|
"eval_runtime": 17.4523, |
|
"eval_samples_per_second": 572.989, |
|
"eval_steps_per_second": 17.935, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.1714285714285714e-06, |
|
"loss": 3.0017, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.132467532467532e-06, |
|
"loss": 2.9889, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.0935064935064938e-06, |
|
"loss": 2.9812, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_accuracy": 0.3970306513409962, |
|
"eval_loss": 3.760990619659424, |
|
"eval_runtime": 17.5073, |
|
"eval_samples_per_second": 571.19, |
|
"eval_steps_per_second": 17.878, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.054545454545454e-06, |
|
"loss": 2.9882, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.0155844155844158e-06, |
|
"loss": 2.9912, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.9766233766233765e-06, |
|
"loss": 2.9518, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.3969435736677116, |
|
"eval_loss": 3.7642152309417725, |
|
"eval_runtime": 17.4159, |
|
"eval_samples_per_second": 574.188, |
|
"eval_steps_per_second": 17.972, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.9376623376623377e-06, |
|
"loss": 2.9598, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.898701298701299e-06, |
|
"loss": 2.9762, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.8597402597402597e-06, |
|
"loss": 2.9826, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_accuracy": 0.39946882619296414, |
|
"eval_loss": 3.749140501022339, |
|
"eval_runtime": 17.4742, |
|
"eval_samples_per_second": 572.273, |
|
"eval_steps_per_second": 17.912, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.820779220779221e-06, |
|
"loss": 2.9705, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.7818181818181817e-06, |
|
"loss": 2.9742, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.742857142857143e-06, |
|
"loss": 2.9689, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_accuracy": 0.39568094740508536, |
|
"eval_loss": 3.760981321334839, |
|
"eval_runtime": 17.4343, |
|
"eval_samples_per_second": 573.581, |
|
"eval_steps_per_second": 17.953, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.7038961038961037e-06, |
|
"loss": 2.9619, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.664935064935065e-06, |
|
"loss": 2.9498, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.625974025974026e-06, |
|
"loss": 2.9517, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.40238592824799724, |
|
"eval_loss": 3.7294681072235107, |
|
"eval_runtime": 17.4607, |
|
"eval_samples_per_second": 572.714, |
|
"eval_steps_per_second": 17.926, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.587012987012987e-06, |
|
"loss": 2.9564, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.548051948051948e-06, |
|
"loss": 2.9505, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.509090909090909e-06, |
|
"loss": 2.9421, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_accuracy": 0.39855451062347613, |
|
"eval_loss": 3.736074209213257, |
|
"eval_runtime": 17.4502, |
|
"eval_samples_per_second": 573.061, |
|
"eval_steps_per_second": 17.937, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 2.4701298701298705e-06, |
|
"loss": 2.9477, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.4311688311688313e-06, |
|
"loss": 2.9389, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 2.3922077922077925e-06, |
|
"loss": 3.2627, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_accuracy": 0.4000348310693138, |
|
"eval_loss": 3.746295690536499, |
|
"eval_runtime": 17.4136, |
|
"eval_samples_per_second": 574.264, |
|
"eval_steps_per_second": 17.974, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 2.3532467532467533e-06, |
|
"loss": 2.9154, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 2.3142857142857145e-06, |
|
"loss": 2.931, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 2.2753246753246757e-06, |
|
"loss": 2.9189, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_accuracy": 0.3969435736677116, |
|
"eval_loss": 3.7732067108154297, |
|
"eval_runtime": 17.4617, |
|
"eval_samples_per_second": 572.681, |
|
"eval_steps_per_second": 17.925, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 2.2363636363636364e-06, |
|
"loss": 2.9295, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 2.1974025974025976e-06, |
|
"loss": 2.9133, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 2.1584415584415584e-06, |
|
"loss": 2.9063, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_accuracy": 0.39990421455938696, |
|
"eval_loss": 3.763644218444824, |
|
"eval_runtime": 17.4503, |
|
"eval_samples_per_second": 573.056, |
|
"eval_steps_per_second": 17.937, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 2.1194805194805196e-06, |
|
"loss": 2.9171, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 2.0805194805194804e-06, |
|
"loss": 2.9087, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 2.0415584415584416e-06, |
|
"loss": 2.9181, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"eval_accuracy": 0.39999129223267155, |
|
"eval_loss": 3.754517078399658, |
|
"eval_runtime": 17.4146, |
|
"eval_samples_per_second": 574.231, |
|
"eval_steps_per_second": 17.973, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 2.002597402597403e-06, |
|
"loss": 2.8923, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.9636363636363636e-06, |
|
"loss": 2.8953, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.924675324675325e-06, |
|
"loss": 2.9137, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"eval_accuracy": 0.4009056078021595, |
|
"eval_loss": 3.7548601627349854, |
|
"eval_runtime": 17.4295, |
|
"eval_samples_per_second": 573.739, |
|
"eval_steps_per_second": 17.958, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.8857142857142858e-06, |
|
"loss": 2.8988, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 1.8467532467532468e-06, |
|
"loss": 2.9048, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 1.8077922077922078e-06, |
|
"loss": 2.8904, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_accuracy": 0.40238592824799724, |
|
"eval_loss": 3.7537996768951416, |
|
"eval_runtime": 17.315, |
|
"eval_samples_per_second": 577.533, |
|
"eval_steps_per_second": 18.077, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 1.7688311688311688e-06, |
|
"loss": 2.9169, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 1.7298701298701297e-06, |
|
"loss": 2.9047, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 1.690909090909091e-06, |
|
"loss": 2.8881, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_accuracy": 0.3987286659700453, |
|
"eval_loss": 3.7595129013061523, |
|
"eval_runtime": 17.237, |
|
"eval_samples_per_second": 580.149, |
|
"eval_steps_per_second": 18.159, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 1.651948051948052e-06, |
|
"loss": 2.8934, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.612987012987013e-06, |
|
"loss": 2.8792, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.574025974025974e-06, |
|
"loss": 2.8876, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_accuracy": 0.39803204458376873, |
|
"eval_loss": 3.7674362659454346, |
|
"eval_runtime": 16.7838, |
|
"eval_samples_per_second": 595.813, |
|
"eval_steps_per_second": 18.649, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.535064935064935e-06, |
|
"loss": 2.8712, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.4961038961038961e-06, |
|
"loss": 2.8557, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.4571428571428571e-06, |
|
"loss": 2.8805, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_accuracy": 0.4021246952281435, |
|
"eval_loss": 3.73473858833313, |
|
"eval_runtime": 17.3707, |
|
"eval_samples_per_second": 575.682, |
|
"eval_steps_per_second": 18.019, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"step": 585, |
|
"total_flos": 3.8216290317512755e+17, |
|
"train_loss": 3.178084816891923, |
|
"train_runtime": 19361.8142, |
|
"train_samples_per_second": 319.258, |
|
"train_steps_per_second": 0.04 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_accuracy": 0.39707419017763845, |
|
"eval_loss": 3.7512481212615967, |
|
"eval_runtime": 17.386, |
|
"eval_samples_per_second": 575.175, |
|
"eval_steps_per_second": 18.003, |
|
"step": 585 |
|
} |
|
], |
|
"max_steps": 770, |
|
"num_train_epochs": 5, |
|
"total_flos": 3.8216290317512755e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|