|
{ |
|
"best_metric": 0.41527342389411354, |
|
"best_model_checkpoint": "/groups/claytonm/enoriega/kw_pubmed/kw_pubmed_5000_0.0003/checkpoint-375", |
|
"epoch": 3.407671998757571, |
|
"global_step": 525, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002984415584415584, |
|
"loss": 5.7783, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002964935064935065, |
|
"loss": 3.8225, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002945454545454545, |
|
"loss": 3.3158, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.38301114594218044, |
|
"eval_loss": 3.8663249015808105, |
|
"eval_runtime": 16.5167, |
|
"eval_samples_per_second": 605.449, |
|
"eval_steps_per_second": 18.951, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002925974025974026, |
|
"loss": 3.0324, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002906493506493506, |
|
"loss": 2.8348, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002887012987012987, |
|
"loss": 2.7061, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.38962904911180773, |
|
"eval_loss": 3.9842898845672607, |
|
"eval_runtime": 16.7828, |
|
"eval_samples_per_second": 595.847, |
|
"eval_steps_per_second": 18.65, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00028675324675324674, |
|
"loss": 2.6148, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00028480519480519477, |
|
"loss": 2.5071, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002828571428571428, |
|
"loss": 2.4347, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.40025252525252525, |
|
"eval_loss": 3.96134614944458, |
|
"eval_runtime": 16.7186, |
|
"eval_samples_per_second": 598.137, |
|
"eval_steps_per_second": 18.722, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002809090909090909, |
|
"loss": 2.386, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002789610389610389, |
|
"loss": 2.3579, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.000277012987012987, |
|
"loss": 2.2818, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.4000348310693138, |
|
"eval_loss": 4.00076961517334, |
|
"eval_runtime": 16.4241, |
|
"eval_samples_per_second": 608.861, |
|
"eval_steps_per_second": 19.057, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0002754545454545454, |
|
"loss": 2.2853, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0002738961038961039, |
|
"loss": 2.4897, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002719480519480519, |
|
"loss": 2.2269, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.39672587948450017, |
|
"eval_loss": 4.018723964691162, |
|
"eval_runtime": 16.4842, |
|
"eval_samples_per_second": 606.64, |
|
"eval_steps_per_second": 18.988, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00027, |
|
"loss": 2.1896, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00026805194805194803, |
|
"loss": 2.2399, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00026610389610389607, |
|
"loss": 2.1639, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.39946882619296414, |
|
"eval_loss": 4.048261642456055, |
|
"eval_runtime": 16.4769, |
|
"eval_samples_per_second": 606.909, |
|
"eval_steps_per_second": 18.996, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0002641558441558441, |
|
"loss": 2.2196, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0002622077922077922, |
|
"loss": 2.1771, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002602597402597402, |
|
"loss": 2.096, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.40103622431208635, |
|
"eval_loss": 4.030768871307373, |
|
"eval_runtime": 16.4452, |
|
"eval_samples_per_second": 608.079, |
|
"eval_steps_per_second": 19.033, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002583116883116883, |
|
"loss": 2.0654, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00025636363636363633, |
|
"loss": 2.1061, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0002544155844155844, |
|
"loss": 2.0405, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.3975095785440613, |
|
"eval_loss": 4.094750881195068, |
|
"eval_runtime": 16.44, |
|
"eval_samples_per_second": 608.272, |
|
"eval_steps_per_second": 19.039, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00025246753246753245, |
|
"loss": 2.0486, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0002505194805194805, |
|
"loss": 2.0271, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00024857142857142857, |
|
"loss": 2.0288, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.4073058167885754, |
|
"eval_loss": 4.073542594909668, |
|
"eval_runtime": 16.4284, |
|
"eval_samples_per_second": 608.702, |
|
"eval_steps_per_second": 19.052, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0002466233766233766, |
|
"loss": 1.9626, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0002446753246753247, |
|
"loss": 1.9723, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0002427272727272727, |
|
"loss": 1.9442, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.4082636711947057, |
|
"eval_loss": 4.0442705154418945, |
|
"eval_runtime": 16.4893, |
|
"eval_samples_per_second": 606.453, |
|
"eval_steps_per_second": 18.982, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00024077922077922077, |
|
"loss": 2.0944, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0002388311688311688, |
|
"loss": 1.8005, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00023688311688311686, |
|
"loss": 1.7948, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.4029519331243469, |
|
"eval_loss": 4.067967414855957, |
|
"eval_runtime": 16.8157, |
|
"eval_samples_per_second": 594.681, |
|
"eval_steps_per_second": 18.614, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00023493506493506492, |
|
"loss": 1.7747, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00023298701298701298, |
|
"loss": 1.7695, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.000231038961038961, |
|
"loss": 1.7509, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_accuracy": 0.40434517589690006, |
|
"eval_loss": 4.150224685668945, |
|
"eval_runtime": 16.8831, |
|
"eval_samples_per_second": 592.308, |
|
"eval_steps_per_second": 18.539, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00022909090909090907, |
|
"loss": 1.708, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00022714285714285713, |
|
"loss": 1.699, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00022519480519480519, |
|
"loss": 1.706, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_accuracy": 0.40917798676419365, |
|
"eval_loss": 4.179904460906982, |
|
"eval_runtime": 16.415, |
|
"eval_samples_per_second": 609.2, |
|
"eval_steps_per_second": 19.068, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00022324675324675322, |
|
"loss": 1.6846, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00022129870129870128, |
|
"loss": 1.6696, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00021935064935064933, |
|
"loss": 1.6689, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.4087861372344131, |
|
"eval_loss": 4.159827709197998, |
|
"eval_runtime": 16.4109, |
|
"eval_samples_per_second": 609.351, |
|
"eval_steps_per_second": 19.073, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0002174025974025974, |
|
"loss": 1.687, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00021545454545454542, |
|
"loss": 1.65, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00021350649350649348, |
|
"loss": 1.6472, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_accuracy": 0.40599965168930685, |
|
"eval_loss": 4.144142150878906, |
|
"eval_runtime": 16.4801, |
|
"eval_samples_per_second": 606.792, |
|
"eval_steps_per_second": 18.993, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00021155844155844154, |
|
"loss": 1.634, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0002096103896103896, |
|
"loss": 1.6136, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00020766233766233763, |
|
"loss": 1.6103, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.41266109369557646, |
|
"eval_loss": 4.268022537231445, |
|
"eval_runtime": 16.5084, |
|
"eval_samples_per_second": 605.753, |
|
"eval_steps_per_second": 18.96, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0002057142857142857, |
|
"loss": 1.6145, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00020376623376623372, |
|
"loss": 1.5818, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0002018181818181818, |
|
"loss": 1.5862, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_accuracy": 0.41431556948798326, |
|
"eval_loss": 4.115560531616211, |
|
"eval_runtime": 16.3845, |
|
"eval_samples_per_second": 610.333, |
|
"eval_steps_per_second": 19.103, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00019987012987012986, |
|
"loss": 1.5816, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0001979220779220779, |
|
"loss": 1.5614, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00019597402597402598, |
|
"loss": 1.5722, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.4119644723092999, |
|
"eval_loss": 4.146672248840332, |
|
"eval_runtime": 16.4614, |
|
"eval_samples_per_second": 607.482, |
|
"eval_steps_per_second": 19.014, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.000194025974025974, |
|
"loss": 1.5387, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00019207792207792207, |
|
"loss": 1.5353, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0001901298701298701, |
|
"loss": 1.5182, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.414272030651341, |
|
"eval_loss": 4.154214382171631, |
|
"eval_runtime": 16.4446, |
|
"eval_samples_per_second": 608.101, |
|
"eval_steps_per_second": 19.034, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0001881818181818182, |
|
"loss": 1.5075, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00018623376623376622, |
|
"loss": 1.5203, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00018428571428571428, |
|
"loss": 1.5195, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.41157262277951934, |
|
"eval_loss": 4.271289825439453, |
|
"eval_runtime": 16.4758, |
|
"eval_samples_per_second": 606.95, |
|
"eval_steps_per_second": 18.998, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0001823376623376623, |
|
"loss": 1.4973, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.0001803896103896104, |
|
"loss": 1.6131, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00017844155844155843, |
|
"loss": 1.3535, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_accuracy": 0.41322709857192613, |
|
"eval_loss": 4.2467217445373535, |
|
"eval_runtime": 16.3639, |
|
"eval_samples_per_second": 611.102, |
|
"eval_steps_per_second": 19.127, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00017649350649350648, |
|
"loss": 1.3275, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00017454545454545452, |
|
"loss": 1.3251, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.0001725974025974026, |
|
"loss": 1.3322, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.40691396725879486, |
|
"eval_loss": 4.347814083099365, |
|
"eval_runtime": 16.4522, |
|
"eval_samples_per_second": 607.822, |
|
"eval_steps_per_second": 19.025, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.00017064935064935063, |
|
"loss": 1.3317, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.0001687012987012987, |
|
"loss": 1.3386, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.00016675324675324672, |
|
"loss": 1.3253, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_accuracy": 0.40747997213514453, |
|
"eval_loss": 4.348999500274658, |
|
"eval_runtime": 16.4842, |
|
"eval_samples_per_second": 606.643, |
|
"eval_steps_per_second": 18.988, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0001648051948051948, |
|
"loss": 1.3325, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00016285714285714284, |
|
"loss": 1.3262, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.0001609090909090909, |
|
"loss": 1.3052, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_accuracy": 0.41448972483455243, |
|
"eval_loss": 4.312870025634766, |
|
"eval_runtime": 16.4771, |
|
"eval_samples_per_second": 606.902, |
|
"eval_steps_per_second": 18.996, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.00015896103896103893, |
|
"loss": 1.3058, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.00015701298701298701, |
|
"loss": 1.297, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.00015506493506493505, |
|
"loss": 1.3093, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_accuracy": 0.41527342389411354, |
|
"eval_loss": 4.299270153045654, |
|
"eval_runtime": 16.3482, |
|
"eval_samples_per_second": 611.689, |
|
"eval_steps_per_second": 19.146, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.0001531168831168831, |
|
"loss": 1.2818, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.00015116883116883114, |
|
"loss": 1.3043, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.0001492207792207792, |
|
"loss": 1.2865, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.4116597004528039, |
|
"eval_loss": 4.3583269119262695, |
|
"eval_runtime": 17.7946, |
|
"eval_samples_per_second": 561.97, |
|
"eval_steps_per_second": 17.59, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.00014727272727272725, |
|
"loss": 1.2959, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.0001453246753246753, |
|
"loss": 1.2998, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.00014337662337662337, |
|
"loss": 1.2934, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_accuracy": 0.4146203413444793, |
|
"eval_loss": 4.293191909790039, |
|
"eval_runtime": 16.4542, |
|
"eval_samples_per_second": 607.749, |
|
"eval_steps_per_second": 19.023, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.0001414285714285714, |
|
"loss": 1.2983, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.00013948051948051946, |
|
"loss": 1.2949, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.00013753246753246752, |
|
"loss": 1.2967, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_accuracy": 0.4104841518634622, |
|
"eval_loss": 4.251837730407715, |
|
"eval_runtime": 16.3722, |
|
"eval_samples_per_second": 610.79, |
|
"eval_steps_per_second": 19.118, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.00013558441558441558, |
|
"loss": 1.2745, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.0001336363636363636, |
|
"loss": 1.276, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.00013168831168831167, |
|
"loss": 1.2706, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.4111807732497388, |
|
"eval_loss": 4.352819919586182, |
|
"eval_runtime": 16.376, |
|
"eval_samples_per_second": 610.648, |
|
"eval_steps_per_second": 19.113, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.00012974025974025972, |
|
"loss": 1.2837, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.00012779220779220778, |
|
"loss": 1.2779, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.00012584415584415581, |
|
"loss": 1.2585, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_accuracy": 0.4151428073841867, |
|
"eval_loss": 4.317609786987305, |
|
"eval_runtime": 16.4566, |
|
"eval_samples_per_second": 607.659, |
|
"eval_steps_per_second": 19.02, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.0001238961038961039, |
|
"loss": 1.2726, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.00012194805194805194, |
|
"loss": 1.2667, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 1.3254, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_accuracy": 0.40573841866945315, |
|
"eval_loss": 4.5338358879089355, |
|
"eval_runtime": 16.961, |
|
"eval_samples_per_second": 589.587, |
|
"eval_steps_per_second": 18.454, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.00011805194805194805, |
|
"loss": 1.1489, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 0.00011610389610389609, |
|
"loss": 1.1305, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.00011415584415584415, |
|
"loss": 1.1389, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_accuracy": 0.4110066179031696, |
|
"eval_loss": 4.393136978149414, |
|
"eval_runtime": 16.4067, |
|
"eval_samples_per_second": 609.508, |
|
"eval_steps_per_second": 19.078, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.0001122077922077922, |
|
"loss": 1.138, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.00011025974025974025, |
|
"loss": 1.1166, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.0001083116883116883, |
|
"loss": 1.149, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_accuracy": 0.4105712295367468, |
|
"eval_loss": 4.424084663391113, |
|
"eval_runtime": 16.3916, |
|
"eval_samples_per_second": 610.068, |
|
"eval_steps_per_second": 19.095, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.00010636363636363636, |
|
"loss": 1.1305, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.0001044155844155844, |
|
"loss": 1.1262, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.00010246753246753246, |
|
"loss": 1.1432, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"eval_accuracy": 0.41122431208638105, |
|
"eval_loss": 4.4350810050964355, |
|
"eval_runtime": 16.3705, |
|
"eval_samples_per_second": 610.854, |
|
"eval_steps_per_second": 19.12, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.0001005194805194805, |
|
"loss": 1.1285, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 9.857142857142856e-05, |
|
"loss": 1.1175, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 9.662337662337661e-05, |
|
"loss": 1.1383, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"eval_accuracy": 0.40865552072448624, |
|
"eval_loss": 4.405033111572266, |
|
"eval_runtime": 16.4548, |
|
"eval_samples_per_second": 607.724, |
|
"eval_steps_per_second": 19.022, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"step": 525, |
|
"total_flos": 3.4290279623915635e+17, |
|
"train_loss": 1.7241914413088844, |
|
"train_runtime": 16805.6947, |
|
"train_samples_per_second": 367.816, |
|
"train_steps_per_second": 0.046 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"eval_accuracy": 0.4117903169627308, |
|
"eval_loss": 4.301416397094727, |
|
"eval_runtime": 16.4623, |
|
"eval_samples_per_second": 607.449, |
|
"eval_steps_per_second": 19.013, |
|
"step": 525 |
|
} |
|
], |
|
"max_steps": 770, |
|
"num_train_epochs": 5, |
|
"total_flos": 3.4290279623915635e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|