|
{ |
|
"best_metric": 0.6366316676139832, |
|
"best_model_checkpoint": "./exper3_mesum5/checkpoint-2800", |
|
"epoch": 8.0, |
|
"global_step": 3440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001994186046511628, |
|
"loss": 4.954, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001988372093023256, |
|
"loss": 4.8641, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019825581395348837, |
|
"loss": 4.6647, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019767441860465116, |
|
"loss": 4.5255, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019709302325581396, |
|
"loss": 4.4509, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019651162790697676, |
|
"loss": 4.253, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019593023255813952, |
|
"loss": 4.2104, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019534883720930232, |
|
"loss": 4.0778, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019476744186046511, |
|
"loss": 3.9537, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001941860465116279, |
|
"loss": 3.895, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.19349112426035503, |
|
"eval_loss": 3.8276302814483643, |
|
"eval_runtime": 19.0606, |
|
"eval_samples_per_second": 88.665, |
|
"eval_steps_per_second": 11.122, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001936046511627907, |
|
"loss": 3.825, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001930232558139535, |
|
"loss": 3.6952, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001924418604651163, |
|
"loss": 3.4767, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001918604651162791, |
|
"loss": 3.5417, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001912790697674419, |
|
"loss": 3.4797, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019069767441860466, |
|
"loss": 3.3749, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019011627906976745, |
|
"loss": 3.4024, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00018953488372093025, |
|
"loss": 3.2742, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00018895348837209304, |
|
"loss": 3.3841, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00018837209302325584, |
|
"loss": 3.1174, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.3106508875739645, |
|
"eval_loss": 3.1216797828674316, |
|
"eval_runtime": 21.0487, |
|
"eval_samples_per_second": 80.29, |
|
"eval_steps_per_second": 10.072, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001877906976744186, |
|
"loss": 3.012, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001872093023255814, |
|
"loss": 3.0221, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001866279069767442, |
|
"loss": 2.9971, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.000186046511627907, |
|
"loss": 3.0478, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00018546511627906976, |
|
"loss": 2.9971, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00018488372093023256, |
|
"loss": 2.7655, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00018430232558139535, |
|
"loss": 2.703, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00018372093023255815, |
|
"loss": 2.8271, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00018313953488372094, |
|
"loss": 2.6716, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0001825581395348837, |
|
"loss": 2.6, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.42071005917159765, |
|
"eval_loss": 2.5399255752563477, |
|
"eval_runtime": 20.8035, |
|
"eval_samples_per_second": 81.236, |
|
"eval_steps_per_second": 10.191, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001819767441860465, |
|
"loss": 2.5875, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001813953488372093, |
|
"loss": 2.6057, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00018081395348837212, |
|
"loss": 2.5459, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0001802325581395349, |
|
"loss": 2.4955, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0001796511627906977, |
|
"loss": 2.3718, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00017906976744186048, |
|
"loss": 2.3314, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00017848837209302328, |
|
"loss": 2.3855, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00017790697674418605, |
|
"loss": 2.313, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00017732558139534884, |
|
"loss": 2.1767, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00017674418604651164, |
|
"loss": 2.256, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.5159763313609468, |
|
"eval_loss": 2.176730155944824, |
|
"eval_runtime": 21.1447, |
|
"eval_samples_per_second": 79.925, |
|
"eval_steps_per_second": 10.026, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00017616279069767443, |
|
"loss": 2.2881, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00017558139534883723, |
|
"loss": 2.0996, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.000175, |
|
"loss": 2.0865, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0001744186046511628, |
|
"loss": 1.84, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0001738372093023256, |
|
"loss": 1.7042, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00017325581395348838, |
|
"loss": 1.6687, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00017267441860465118, |
|
"loss": 1.8394, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00017209302325581395, |
|
"loss": 1.6962, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00017151162790697674, |
|
"loss": 1.7329, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00017093023255813954, |
|
"loss": 1.5441, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_accuracy": 0.585207100591716, |
|
"eval_loss": 1.8085863590240479, |
|
"eval_runtime": 21.5882, |
|
"eval_samples_per_second": 78.284, |
|
"eval_steps_per_second": 9.82, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00017034883720930233, |
|
"loss": 1.6099, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0001697674418604651, |
|
"loss": 1.5833, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0001691860465116279, |
|
"loss": 1.5696, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00016860465116279072, |
|
"loss": 1.3757, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00016802325581395352, |
|
"loss": 1.4537, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00016744186046511629, |
|
"loss": 1.445, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00016686046511627908, |
|
"loss": 1.3975, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00016627906976744188, |
|
"loss": 1.4086, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00016569767441860467, |
|
"loss": 1.3367, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00016511627906976747, |
|
"loss": 1.3834, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_accuracy": 0.6325443786982249, |
|
"eval_loss": 1.556496500968933, |
|
"eval_runtime": 21.486, |
|
"eval_samples_per_second": 78.656, |
|
"eval_steps_per_second": 9.867, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00016453488372093024, |
|
"loss": 1.2953, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00016395348837209303, |
|
"loss": 1.2843, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00016337209302325583, |
|
"loss": 1.1906, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00016279069767441862, |
|
"loss": 1.3458, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0001622093023255814, |
|
"loss": 1.1714, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00016162790697674419, |
|
"loss": 1.191, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00016104651162790698, |
|
"loss": 1.159, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00016046511627906978, |
|
"loss": 1.2594, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00015988372093023257, |
|
"loss": 1.1533, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00015930232558139534, |
|
"loss": 1.1995, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.6763313609467455, |
|
"eval_loss": 1.3339420557022095, |
|
"eval_runtime": 19.734, |
|
"eval_samples_per_second": 85.639, |
|
"eval_steps_per_second": 10.743, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00015872093023255814, |
|
"loss": 1.0989, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00015813953488372093, |
|
"loss": 1.2864, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00015755813953488373, |
|
"loss": 1.2124, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00015697674418604652, |
|
"loss": 1.1752, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001563953488372093, |
|
"loss": 1.2127, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0001558139534883721, |
|
"loss": 1.1634, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0001552325581395349, |
|
"loss": 1.0915, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00015465116279069768, |
|
"loss": 1.103, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00015406976744186047, |
|
"loss": 1.0951, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00015348837209302327, |
|
"loss": 1.0845, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_accuracy": 0.6532544378698225, |
|
"eval_loss": 1.3299002647399902, |
|
"eval_runtime": 19.6334, |
|
"eval_samples_per_second": 86.078, |
|
"eval_steps_per_second": 10.798, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00015290697674418606, |
|
"loss": 1.03, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00015232558139534886, |
|
"loss": 0.9288, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00015174418604651163, |
|
"loss": 0.9963, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00015116279069767442, |
|
"loss": 1.0363, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00015058139534883722, |
|
"loss": 0.9671, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.8207, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.0001494186046511628, |
|
"loss": 0.6051, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00014883720930232558, |
|
"loss": 0.6698, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00014825581395348837, |
|
"loss": 0.6969, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00014767441860465117, |
|
"loss": 0.6472, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_accuracy": 0.7218934911242604, |
|
"eval_loss": 1.0679467916488647, |
|
"eval_runtime": 19.5979, |
|
"eval_samples_per_second": 86.234, |
|
"eval_steps_per_second": 10.818, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00014709302325581396, |
|
"loss": 0.6366, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.00014651162790697673, |
|
"loss": 0.6353, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.00014593023255813953, |
|
"loss": 0.4765, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.00014534883720930232, |
|
"loss": 0.453, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00014476744186046512, |
|
"loss": 0.5234, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.00014418604651162791, |
|
"loss": 0.5019, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0001436046511627907, |
|
"loss": 0.6719, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.0001430232558139535, |
|
"loss": 0.5294, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.0001424418604651163, |
|
"loss": 0.6135, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0001418604651162791, |
|
"loss": 0.5948, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_accuracy": 0.7124260355029586, |
|
"eval_loss": 1.0286362171173096, |
|
"eval_runtime": 19.4904, |
|
"eval_samples_per_second": 86.709, |
|
"eval_steps_per_second": 10.877, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.00014127906976744186, |
|
"loss": 0.6138, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.00014069767441860466, |
|
"loss": 0.6543, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.00014011627906976746, |
|
"loss": 0.5534, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.00013953488372093025, |
|
"loss": 0.6408, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00013895348837209302, |
|
"loss": 0.4687, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.00013837209302325582, |
|
"loss": 0.4635, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.0001377906976744186, |
|
"loss": 0.4466, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.0001372093023255814, |
|
"loss": 0.4255, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.0001366279069767442, |
|
"loss": 0.5848, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.00013604651162790697, |
|
"loss": 0.5565, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_accuracy": 0.7284023668639054, |
|
"eval_loss": 0.9595437049865723, |
|
"eval_runtime": 19.5222, |
|
"eval_samples_per_second": 86.568, |
|
"eval_steps_per_second": 10.859, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.00013546511627906977, |
|
"loss": 0.4625, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.00013488372093023256, |
|
"loss": 0.7103, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.00013430232558139536, |
|
"loss": 0.5923, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.00013372093023255815, |
|
"loss": 0.4913, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.00013313953488372092, |
|
"loss": 0.4915, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.00013255813953488372, |
|
"loss": 0.401, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.0001319767441860465, |
|
"loss": 0.4169, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.0001313953488372093, |
|
"loss": 0.52, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0001308139534883721, |
|
"loss": 0.4018, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.0001302325581395349, |
|
"loss": 0.4879, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_accuracy": 0.7420118343195267, |
|
"eval_loss": 0.8915188312530518, |
|
"eval_runtime": 19.4697, |
|
"eval_samples_per_second": 86.801, |
|
"eval_steps_per_second": 10.889, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 0.0001296511627906977, |
|
"loss": 0.527, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0001290697674418605, |
|
"loss": 0.4114, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.00012848837209302326, |
|
"loss": 0.5728, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.00012790697674418605, |
|
"loss": 0.347, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.00012732558139534885, |
|
"loss": 0.3652, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.00012674418604651164, |
|
"loss": 0.5574, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.00012616279069767444, |
|
"loss": 0.4363, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.0001255813953488372, |
|
"loss": 0.4769, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.000125, |
|
"loss": 0.5541, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.0001244186046511628, |
|
"loss": 0.2816, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_accuracy": 0.7763313609467456, |
|
"eval_loss": 0.8158556818962097, |
|
"eval_runtime": 19.4244, |
|
"eval_samples_per_second": 87.004, |
|
"eval_steps_per_second": 10.914, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.0001238372093023256, |
|
"loss": 0.2724, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.00012325581395348836, |
|
"loss": 0.234, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 0.00012267441860465116, |
|
"loss": 0.2116, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.00012209302325581395, |
|
"loss": 0.2968, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.00012151162790697675, |
|
"loss": 0.2539, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 0.00012093023255813953, |
|
"loss": 0.2837, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.00012034883720930233, |
|
"loss": 0.2571, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.00011976744186046511, |
|
"loss": 0.2502, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 0.0001191860465116279, |
|
"loss": 0.2629, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.00011860465116279071, |
|
"loss": 0.2412, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_accuracy": 0.7911242603550296, |
|
"eval_loss": 0.776643693447113, |
|
"eval_runtime": 19.7719, |
|
"eval_samples_per_second": 85.475, |
|
"eval_steps_per_second": 10.722, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.00011802325581395351, |
|
"loss": 0.2721, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.00011744186046511629, |
|
"loss": 0.1876, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.00011686046511627909, |
|
"loss": 0.2417, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.00011627906976744187, |
|
"loss": 0.2029, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 0.00011569767441860466, |
|
"loss": 0.1991, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.00011511627906976746, |
|
"loss": 0.2763, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.00011453488372093024, |
|
"loss": 0.1539, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 0.00011395348837209304, |
|
"loss": 0.2287, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.00011337209302325582, |
|
"loss": 0.2572, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.00011279069767441861, |
|
"loss": 0.2015, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"eval_accuracy": 0.7828402366863906, |
|
"eval_loss": 0.784956157207489, |
|
"eval_runtime": 19.4284, |
|
"eval_samples_per_second": 86.986, |
|
"eval_steps_per_second": 10.912, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.0001122093023255814, |
|
"loss": 0.2263, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.00011162790697674419, |
|
"loss": 0.2843, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.00011104651162790699, |
|
"loss": 0.2371, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.00011046511627906977, |
|
"loss": 0.1717, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.00010988372093023256, |
|
"loss": 0.311, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.00010930232558139534, |
|
"loss": 0.2567, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 0.00010872093023255814, |
|
"loss": 0.1739, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.00010813953488372092, |
|
"loss": 0.2185, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 0.00010755813953488372, |
|
"loss": 0.2158, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 0.00010697674418604651, |
|
"loss": 0.274, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_accuracy": 0.7934911242603551, |
|
"eval_loss": 0.7361425757408142, |
|
"eval_runtime": 19.2623, |
|
"eval_samples_per_second": 87.736, |
|
"eval_steps_per_second": 11.006, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.0001063953488372093, |
|
"loss": 0.1164, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.0001058139534883721, |
|
"loss": 0.1515, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 0.0001052325581395349, |
|
"loss": 0.2399, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.00010465116279069768, |
|
"loss": 0.2516, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.00010406976744186048, |
|
"loss": 0.2054, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 0.00010348837209302327, |
|
"loss": 0.1758, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.00010290697674418605, |
|
"loss": 0.1903, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.00010232558139534885, |
|
"loss": 0.1922, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 0.00010174418604651163, |
|
"loss": 0.2019, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.00010116279069767443, |
|
"loss": 0.1244, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_accuracy": 0.7911242603550296, |
|
"eval_loss": 0.7299075126647949, |
|
"eval_runtime": 19.238, |
|
"eval_samples_per_second": 87.847, |
|
"eval_steps_per_second": 11.02, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 0.00010058139534883721, |
|
"loss": 0.2361, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1389, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 9.94186046511628e-05, |
|
"loss": 0.0844, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 9.883720930232558e-05, |
|
"loss": 0.1463, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 9.825581395348838e-05, |
|
"loss": 0.09, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 9.767441860465116e-05, |
|
"loss": 0.1106, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 9.709302325581396e-05, |
|
"loss": 0.1387, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 9.651162790697675e-05, |
|
"loss": 0.1231, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 9.593023255813955e-05, |
|
"loss": 0.0734, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 9.534883720930233e-05, |
|
"loss": 0.0794, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"eval_accuracy": 0.7846153846153846, |
|
"eval_loss": 0.7440704107284546, |
|
"eval_runtime": 19.1417, |
|
"eval_samples_per_second": 88.289, |
|
"eval_steps_per_second": 11.075, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 9.476744186046512e-05, |
|
"loss": 0.0885, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 9.418604651162792e-05, |
|
"loss": 0.0781, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 9.36046511627907e-05, |
|
"loss": 0.0842, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 9.30232558139535e-05, |
|
"loss": 0.0957, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 9.244186046511628e-05, |
|
"loss": 0.0561, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 9.186046511627907e-05, |
|
"loss": 0.0905, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 9.127906976744186e-05, |
|
"loss": 0.075, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 9.069767441860465e-05, |
|
"loss": 0.1165, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 9.011627906976745e-05, |
|
"loss": 0.0809, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 8.953488372093024e-05, |
|
"loss": 0.0915, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"eval_accuracy": 0.7940828402366864, |
|
"eval_loss": 0.7614301443099976, |
|
"eval_runtime": 19.2799, |
|
"eval_samples_per_second": 87.656, |
|
"eval_steps_per_second": 10.996, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 8.895348837209302e-05, |
|
"loss": 0.1187, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 8.837209302325582e-05, |
|
"loss": 0.1082, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 8.779069767441861e-05, |
|
"loss": 0.0698, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 8.72093023255814e-05, |
|
"loss": 0.1073, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 8.662790697674419e-05, |
|
"loss": 0.0638, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 8.604651162790697e-05, |
|
"loss": 0.157, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 8.546511627906977e-05, |
|
"loss": 0.0641, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 8.488372093023255e-05, |
|
"loss": 0.0536, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 8.430232558139536e-05, |
|
"loss": 0.0921, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 8.372093023255814e-05, |
|
"loss": 0.0817, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_accuracy": 0.8011834319526627, |
|
"eval_loss": 0.7310301065444946, |
|
"eval_runtime": 19.2052, |
|
"eval_samples_per_second": 87.997, |
|
"eval_steps_per_second": 11.039, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 8.313953488372094e-05, |
|
"loss": 0.0876, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 8.255813953488373e-05, |
|
"loss": 0.0959, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 8.197674418604652e-05, |
|
"loss": 0.0945, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 8.139534883720931e-05, |
|
"loss": 0.0375, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 8.081395348837209e-05, |
|
"loss": 0.0877, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 8.023255813953489e-05, |
|
"loss": 0.053, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 7.965116279069767e-05, |
|
"loss": 0.1233, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 7.906976744186047e-05, |
|
"loss": 0.1089, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 7.848837209302326e-05, |
|
"loss": 0.1326, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 7.790697674418606e-05, |
|
"loss": 0.0561, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"eval_accuracy": 0.806508875739645, |
|
"eval_loss": 0.722186267375946, |
|
"eval_runtime": 19.2931, |
|
"eval_samples_per_second": 87.596, |
|
"eval_steps_per_second": 10.988, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 7.732558139534884e-05, |
|
"loss": 0.1245, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 7.674418604651163e-05, |
|
"loss": 0.0414, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 7.616279069767443e-05, |
|
"loss": 0.0901, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 7.558139534883721e-05, |
|
"loss": 0.0751, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.1456, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 7.441860465116279e-05, |
|
"loss": 0.0245, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 7.383720930232558e-05, |
|
"loss": 0.0377, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 7.325581395348837e-05, |
|
"loss": 0.0435, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 7.267441860465116e-05, |
|
"loss": 0.0639, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 7.209302325581396e-05, |
|
"loss": 0.0165, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"eval_accuracy": 0.8059171597633136, |
|
"eval_loss": 0.7515397667884827, |
|
"eval_runtime": 19.4193, |
|
"eval_samples_per_second": 87.027, |
|
"eval_steps_per_second": 10.917, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 7.151162790697675e-05, |
|
"loss": 0.0338, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 7.093023255813955e-05, |
|
"loss": 0.0586, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 7.034883720930233e-05, |
|
"loss": 0.0787, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 6.976744186046513e-05, |
|
"loss": 0.023, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 6.918604651162791e-05, |
|
"loss": 0.0681, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 6.86046511627907e-05, |
|
"loss": 0.0569, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 6.802325581395348e-05, |
|
"loss": 0.0206, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 6.744186046511628e-05, |
|
"loss": 0.0369, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 6.686046511627908e-05, |
|
"loss": 0.0526, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 6.627906976744186e-05, |
|
"loss": 0.0168, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"eval_accuracy": 0.821301775147929, |
|
"eval_loss": 0.6687235832214355, |
|
"eval_runtime": 19.3189, |
|
"eval_samples_per_second": 87.479, |
|
"eval_steps_per_second": 10.974, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 6.569767441860465e-05, |
|
"loss": 0.0185, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 6.511627906976745e-05, |
|
"loss": 0.018, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 6.453488372093024e-05, |
|
"loss": 0.0503, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 6.395348837209303e-05, |
|
"loss": 0.0145, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 6.337209302325582e-05, |
|
"loss": 0.0139, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 6.27906976744186e-05, |
|
"loss": 0.0134, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 6.22093023255814e-05, |
|
"loss": 0.0168, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 6.162790697674418e-05, |
|
"loss": 0.0234, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 6.104651162790698e-05, |
|
"loss": 0.046, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 6.0465116279069765e-05, |
|
"loss": 0.0212, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"eval_accuracy": 0.8248520710059172, |
|
"eval_loss": 0.6671048402786255, |
|
"eval_runtime": 19.6081, |
|
"eval_samples_per_second": 86.189, |
|
"eval_steps_per_second": 10.812, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 5.9883720930232554e-05, |
|
"loss": 0.0182, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 5.9302325581395356e-05, |
|
"loss": 0.0101, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 5.8720930232558145e-05, |
|
"loss": 0.0131, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 5.8139534883720933e-05, |
|
"loss": 0.0592, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 5.755813953488373e-05, |
|
"loss": 0.0317, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 5.697674418604652e-05, |
|
"loss": 0.0136, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 5.6395348837209306e-05, |
|
"loss": 0.024, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 5.5813953488372095e-05, |
|
"loss": 0.0233, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 5.5232558139534884e-05, |
|
"loss": 0.034, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 5.465116279069767e-05, |
|
"loss": 0.0389, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"eval_accuracy": 0.827810650887574, |
|
"eval_loss": 0.6893125176429749, |
|
"eval_runtime": 19.5009, |
|
"eval_samples_per_second": 86.663, |
|
"eval_steps_per_second": 10.871, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 5.406976744186046e-05, |
|
"loss": 0.0227, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 5.348837209302326e-05, |
|
"loss": 0.0186, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 5.290697674418605e-05, |
|
"loss": 0.022, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 5.232558139534884e-05, |
|
"loss": 0.0373, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 5.1744186046511636e-05, |
|
"loss": 0.0144, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 5.1162790697674425e-05, |
|
"loss": 0.0112, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 5.0581395348837214e-05, |
|
"loss": 0.0331, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0139, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 4.941860465116279e-05, |
|
"loss": 0.0142, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 4.883720930232558e-05, |
|
"loss": 0.0087, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"eval_accuracy": 0.8260355029585799, |
|
"eval_loss": 0.6839348077774048, |
|
"eval_runtime": 19.5465, |
|
"eval_samples_per_second": 86.461, |
|
"eval_steps_per_second": 10.846, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 4.8255813953488375e-05, |
|
"loss": 0.0089, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 4.7674418604651164e-05, |
|
"loss": 0.0168, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 4.709302325581396e-05, |
|
"loss": 0.009, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 4.651162790697675e-05, |
|
"loss": 0.0079, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 4.593023255813954e-05, |
|
"loss": 0.0092, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 4.5348837209302326e-05, |
|
"loss": 0.01, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 4.476744186046512e-05, |
|
"loss": 0.0134, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 4.418604651162791e-05, |
|
"loss": 0.0265, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 4.36046511627907e-05, |
|
"loss": 0.0079, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 4.302325581395349e-05, |
|
"loss": 0.0087, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"eval_accuracy": 0.8319526627218935, |
|
"eval_loss": 0.6412006616592407, |
|
"eval_runtime": 19.4572, |
|
"eval_samples_per_second": 86.857, |
|
"eval_steps_per_second": 10.896, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 4.2441860465116276e-05, |
|
"loss": 0.0242, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 4.186046511627907e-05, |
|
"loss": 0.0087, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 4.127906976744187e-05, |
|
"loss": 0.0097, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 4.0697674418604655e-05, |
|
"loss": 0.0073, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 4.0116279069767444e-05, |
|
"loss": 0.0077, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 3.953488372093023e-05, |
|
"loss": 0.0115, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 3.895348837209303e-05, |
|
"loss": 0.0076, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 3.837209302325582e-05, |
|
"loss": 0.0092, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 3.7790697674418606e-05, |
|
"loss": 0.0071, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 3.7209302325581394e-05, |
|
"loss": 0.0077, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"eval_accuracy": 0.8366863905325443, |
|
"eval_loss": 0.6366316676139832, |
|
"eval_runtime": 19.6422, |
|
"eval_samples_per_second": 86.039, |
|
"eval_steps_per_second": 10.793, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 3.662790697674418e-05, |
|
"loss": 0.0081, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 3.604651162790698e-05, |
|
"loss": 0.0077, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 3.5465116279069774e-05, |
|
"loss": 0.0068, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 3.488372093023256e-05, |
|
"loss": 0.0069, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 3.430232558139535e-05, |
|
"loss": 0.0078, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 3.372093023255814e-05, |
|
"loss": 0.0069, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 3.313953488372093e-05, |
|
"loss": 0.0075, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 3.2558139534883724e-05, |
|
"loss": 0.0088, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 3.197674418604651e-05, |
|
"loss": 0.007, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 3.13953488372093e-05, |
|
"loss": 0.0065, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_accuracy": 0.8272189349112427, |
|
"eval_loss": 0.6696515679359436, |
|
"eval_runtime": 19.4791, |
|
"eval_samples_per_second": 86.76, |
|
"eval_steps_per_second": 10.883, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 3.081395348837209e-05, |
|
"loss": 0.0276, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 3.0232558139534883e-05, |
|
"loss": 0.0064, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 2.9651162790697678e-05, |
|
"loss": 0.0064, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 2.9069767441860467e-05, |
|
"loss": 0.0271, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 2.848837209302326e-05, |
|
"loss": 0.0062, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 2.7906976744186048e-05, |
|
"loss": 0.0073, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 2.7325581395348836e-05, |
|
"loss": 0.0074, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 2.674418604651163e-05, |
|
"loss": 0.0068, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 2.616279069767442e-05, |
|
"loss": 0.007, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 2.5581395348837212e-05, |
|
"loss": 0.0061, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_accuracy": 0.8349112426035503, |
|
"eval_loss": 0.6509989500045776, |
|
"eval_runtime": 19.534, |
|
"eval_samples_per_second": 86.516, |
|
"eval_steps_per_second": 10.853, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0411, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 2.441860465116279e-05, |
|
"loss": 0.006, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 2.3837209302325582e-05, |
|
"loss": 0.0066, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 2.3255813953488374e-05, |
|
"loss": 0.0058, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 2.2674418604651163e-05, |
|
"loss": 0.0064, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 2.2093023255813955e-05, |
|
"loss": 0.006, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 2.1511627906976744e-05, |
|
"loss": 0.0064, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 2.0930232558139536e-05, |
|
"loss": 0.0063, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 2.0348837209302328e-05, |
|
"loss": 0.0243, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 1.9767441860465116e-05, |
|
"loss": 0.0185, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_accuracy": 0.8366863905325443, |
|
"eval_loss": 0.6451619267463684, |
|
"eval_runtime": 19.7485, |
|
"eval_samples_per_second": 85.576, |
|
"eval_steps_per_second": 10.735, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 1.918604651162791e-05, |
|
"loss": 0.0111, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 1.8604651162790697e-05, |
|
"loss": 0.0063, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 1.802325581395349e-05, |
|
"loss": 0.007, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 1.744186046511628e-05, |
|
"loss": 0.006, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 1.686046511627907e-05, |
|
"loss": 0.0064, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 1.6279069767441862e-05, |
|
"loss": 0.006, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 1.569767441860465e-05, |
|
"loss": 0.0059, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 1.5116279069767441e-05, |
|
"loss": 0.0053, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 1.4534883720930233e-05, |
|
"loss": 0.006, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 1.3953488372093024e-05, |
|
"loss": 0.0059, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"eval_accuracy": 0.8378698224852071, |
|
"eval_loss": 0.6426283717155457, |
|
"eval_runtime": 19.368, |
|
"eval_samples_per_second": 87.257, |
|
"eval_steps_per_second": 10.946, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 1.3372093023255814e-05, |
|
"loss": 0.0059, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 1.2790697674418606e-05, |
|
"loss": 0.0057, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 1.2209302325581395e-05, |
|
"loss": 0.006, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 1.1627906976744187e-05, |
|
"loss": 0.0053, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 1.1046511627906977e-05, |
|
"loss": 0.0053, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 1.0465116279069768e-05, |
|
"loss": 0.0062, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 9.883720930232558e-06, |
|
"loss": 0.0061, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 9.302325581395349e-06, |
|
"loss": 0.0061, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 8.72093023255814e-06, |
|
"loss": 0.0053, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 8.139534883720931e-06, |
|
"loss": 0.0062, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"eval_accuracy": 0.8378698224852071, |
|
"eval_loss": 0.6398439407348633, |
|
"eval_runtime": 20.4188, |
|
"eval_samples_per_second": 82.767, |
|
"eval_steps_per_second": 10.383, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 7.558139534883721e-06, |
|
"loss": 0.0056, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 6.976744186046512e-06, |
|
"loss": 0.0058, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 6.395348837209303e-06, |
|
"loss": 0.0058, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 5.8139534883720935e-06, |
|
"loss": 0.0058, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 5.232558139534884e-06, |
|
"loss": 0.0062, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 4.651162790697674e-06, |
|
"loss": 0.0055, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 4.0697674418604655e-06, |
|
"loss": 0.0053, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 3.488372093023256e-06, |
|
"loss": 0.0057, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 2.9069767441860468e-06, |
|
"loss": 0.006, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 2.325581395348837e-06, |
|
"loss": 0.0315, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"eval_accuracy": 0.8384615384615385, |
|
"eval_loss": 0.6396650075912476, |
|
"eval_runtime": 19.5469, |
|
"eval_samples_per_second": 86.459, |
|
"eval_steps_per_second": 10.846, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 1.744186046511628e-06, |
|
"loss": 0.0074, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 1.1627906976744186e-06, |
|
"loss": 0.0057, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 5.813953488372093e-07, |
|
"loss": 0.0058, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0057, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 3440, |
|
"total_flos": 4.2707785173722726e+18, |
|
"train_loss": 0.6754590564342432, |
|
"train_runtime": 2008.5618, |
|
"train_samples_per_second": 27.403, |
|
"train_steps_per_second": 1.713 |
|
} |
|
], |
|
"max_steps": 3440, |
|
"num_train_epochs": 8, |
|
"total_flos": 4.2707785173722726e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|