|
{ |
|
"best_metric": 1.4148167371749878, |
|
"best_model_checkpoint": "/scratch/s3545881/dumped/translation/mt5/3734190/checkpoint-4600", |
|
"epoch": 18.231144872490503, |
|
"global_step": 5600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.5e-06, |
|
"loss": 16.635, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5e-06, |
|
"loss": 16.8135, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.5e-06, |
|
"loss": 16.2077, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1e-05, |
|
"loss": 15.7712, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.25e-05, |
|
"loss": 14.7744, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.5e-05, |
|
"loss": 14.0924, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.7500000000000002e-05, |
|
"loss": 13.2153, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2e-05, |
|
"loss": 12.5009, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.2499999999999998e-05, |
|
"loss": 11.6579, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.5e-05, |
|
"loss": 10.6585, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.75e-05, |
|
"loss": 9.6627, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3e-05, |
|
"loss": 9.1927, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 8.4296, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.5000000000000004e-05, |
|
"loss": 7.9397, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.75e-05, |
|
"loss": 7.5002, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4e-05, |
|
"loss": 7.0869, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.25e-05, |
|
"loss": 6.8123, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.4999999999999996e-05, |
|
"loss": 6.4624, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.75e-05, |
|
"loss": 6.2002, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5e-05, |
|
"loss": 5.9634, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
8158, |
|
3455, |
|
1823, |
|
1029 |
|
], |
|
"eval_loss": 3.1029841899871826, |
|
"eval_precisions": [ |
|
2.863641788529988, |
|
1.2277067291121069, |
|
0.6539183086365283, |
|
0.3724954297815345 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 262.8531, |
|
"eval_samples_per_second": 13.175, |
|
"eval_score": 0.9619764567851472, |
|
"eval_steps_per_second": 13.175, |
|
"eval_sys_len": 284882, |
|
"eval_totals": [ |
|
284882, |
|
281419, |
|
278781, |
|
276245 |
|
], |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.25e-05, |
|
"loss": 5.8077, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.5e-05, |
|
"loss": 5.693, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.75e-05, |
|
"loss": 5.5207, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6e-05, |
|
"loss": 5.4243, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.25e-05, |
|
"loss": 5.3032, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 5.2149, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.75e-05, |
|
"loss": 5.1234, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 7.000000000000001e-05, |
|
"loss": 5.0828, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 7.25e-05, |
|
"loss": 4.9444, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.5e-05, |
|
"loss": 4.8644, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 7.75e-05, |
|
"loss": 4.8276, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 8e-05, |
|
"loss": 4.8093, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 8.25e-05, |
|
"loss": 4.7373, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.5e-05, |
|
"loss": 4.6358, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.75e-05, |
|
"loss": 4.6567, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 4.4909, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 9.25e-05, |
|
"loss": 4.5973, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 9.5e-05, |
|
"loss": 4.4554, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 9.750000000000001e-05, |
|
"loss": 4.5076, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0001, |
|
"loss": 4.4274, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
20993, |
|
12057, |
|
7745, |
|
5231 |
|
], |
|
"eval_loss": 2.268582582473755, |
|
"eval_precisions": [ |
|
27.83626816590644, |
|
16.75677178157964, |
|
11.30788997255154, |
|
8.037923139568832 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 232.9413, |
|
"eval_samples_per_second": 14.866, |
|
"eval_score": 14.349332444590503, |
|
"eval_steps_per_second": 14.866, |
|
"eval_sys_len": 75416, |
|
"eval_totals": [ |
|
75416, |
|
71953, |
|
68492, |
|
65079 |
|
], |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0001025, |
|
"loss": 4.4082, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.000105, |
|
"loss": 4.3035, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0001075, |
|
"loss": 4.3256, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00011, |
|
"loss": 4.3323, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00011250000000000001, |
|
"loss": 4.2534, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.000115, |
|
"loss": 4.291, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0001175, |
|
"loss": 4.2201, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00012, |
|
"loss": 4.3005, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0001225, |
|
"loss": 4.2619, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.000125, |
|
"loss": 4.1464, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0001275, |
|
"loss": 4.104, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013000000000000002, |
|
"loss": 4.1574, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00013250000000000002, |
|
"loss": 4.1, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.000135, |
|
"loss": 4.078, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0001375, |
|
"loss": 4.1266, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00014000000000000001, |
|
"loss": 3.9953, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0001425, |
|
"loss": 4.0587, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.000145, |
|
"loss": 4.0022, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0001475, |
|
"loss": 3.963, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00015, |
|
"loss": 4.0209, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
22518, |
|
13520, |
|
9025, |
|
6338 |
|
], |
|
"eval_loss": 2.0687694549560547, |
|
"eval_precisions": [ |
|
44.010554089709764, |
|
28.342627143515994, |
|
20.398246089865292, |
|
15.523280021553308 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 179.1653, |
|
"eval_samples_per_second": 19.329, |
|
"eval_score": 25.06936390823504, |
|
"eval_steps_per_second": 19.329, |
|
"eval_sys_len": 51165, |
|
"eval_totals": [ |
|
51165, |
|
47702, |
|
44244, |
|
40829 |
|
], |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0001525, |
|
"loss": 3.922, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.000155, |
|
"loss": 3.872, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0001575, |
|
"loss": 3.9436, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00016, |
|
"loss": 3.8825, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00016250000000000002, |
|
"loss": 3.8445, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.000165, |
|
"loss": 3.8438, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.0001675, |
|
"loss": 3.9445, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00017, |
|
"loss": 3.8441, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.0001725, |
|
"loss": 3.8509, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.000175, |
|
"loss": 3.8228, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.0001775, |
|
"loss": 3.7646, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 3.782, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.0001825, |
|
"loss": 3.8468, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.000185, |
|
"loss": 3.7436, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.0001875, |
|
"loss": 3.7842, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.00019, |
|
"loss": 3.7974, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.00019250000000000002, |
|
"loss": 3.7282, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.00019500000000000002, |
|
"loss": 3.7219, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.0001975, |
|
"loss": 3.6277, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.0002, |
|
"loss": 3.7015, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
24390, |
|
15185, |
|
10398, |
|
7459 |
|
], |
|
"eval_loss": 1.9690614938735962, |
|
"eval_precisions": [ |
|
54.65668698458229, |
|
36.89171788829231, |
|
27.56116309274525, |
|
21.719760060567236 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 156.3814, |
|
"eval_samples_per_second": 22.145, |
|
"eval_score": 33.14600462748512, |
|
"eval_steps_per_second": 22.145, |
|
"eval_sys_len": 44624, |
|
"eval_totals": [ |
|
44624, |
|
41161, |
|
37727, |
|
34342 |
|
], |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.00020250000000000002, |
|
"loss": 3.794, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.000205, |
|
"loss": 3.6069, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002075, |
|
"loss": 3.7057, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.00021, |
|
"loss": 3.6715, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0002125, |
|
"loss": 3.6437, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.000215, |
|
"loss": 3.6129, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.0002175, |
|
"loss": 3.6232, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.00022, |
|
"loss": 3.5967, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.00022250000000000001, |
|
"loss": 3.5749, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.00022500000000000002, |
|
"loss": 3.6316, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.0002275, |
|
"loss": 3.6169, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.00023, |
|
"loss": 3.5349, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.0002325, |
|
"loss": 3.4972, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.000235, |
|
"loss": 3.534, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 0.0002375, |
|
"loss": 3.5623, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.00024, |
|
"loss": 3.4621, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 0.00024249999999999999, |
|
"loss": 3.5827, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.000245, |
|
"loss": 3.4537, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.0002475, |
|
"loss": 3.504, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.00025, |
|
"loss": 3.5469, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
24938, |
|
15828, |
|
10952, |
|
7917 |
|
], |
|
"eval_loss": 1.8684660196304321, |
|
"eval_precisions": [ |
|
52.90423861852433, |
|
36.240412135088725, |
|
27.213318424649025, |
|
21.47740220281048 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 163.2477, |
|
"eval_samples_per_second": 21.213, |
|
"eval_score": 32.53581668713074, |
|
"eval_steps_per_second": 21.213, |
|
"eval_sys_len": 47138, |
|
"eval_totals": [ |
|
47138, |
|
43675, |
|
40245, |
|
36862 |
|
], |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.0002525, |
|
"loss": 3.4755, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 0.000255, |
|
"loss": 3.5018, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.0002575, |
|
"loss": 3.4461, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00026000000000000003, |
|
"loss": 3.4587, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.00026250000000000004, |
|
"loss": 3.391, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 0.00026500000000000004, |
|
"loss": 3.4249, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.0002675, |
|
"loss": 3.5206, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.00027, |
|
"loss": 3.5261, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 0.0002725, |
|
"loss": 3.4603, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.000275, |
|
"loss": 3.4359, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 0.0002775, |
|
"loss": 3.4632, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 0.00028000000000000003, |
|
"loss": 3.4216, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 0.0002825, |
|
"loss": 3.3616, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 0.000285, |
|
"loss": 3.3384, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.0002875, |
|
"loss": 3.2927, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.00029, |
|
"loss": 3.4258, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.0002925, |
|
"loss": 3.4243, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.000295, |
|
"loss": 3.3272, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 0.00029749999999999997, |
|
"loss": 3.4568, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.0003, |
|
"loss": 3.4532, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
25039, |
|
16021, |
|
11141, |
|
8061 |
|
], |
|
"eval_loss": 1.7631980180740356, |
|
"eval_precisions": [ |
|
49.4558454640621, |
|
33.96726455497604, |
|
25.47329431132248, |
|
19.976704996034893 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 193.6778, |
|
"eval_samples_per_second": 17.88, |
|
"eval_score": 30.406875331612675, |
|
"eval_steps_per_second": 17.88, |
|
"eval_sys_len": 50629, |
|
"eval_totals": [ |
|
50629, |
|
47166, |
|
43736, |
|
40352 |
|
], |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 0.0003025, |
|
"loss": 3.3871, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 0.000305, |
|
"loss": 3.3748, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0003075, |
|
"loss": 3.3834, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 0.00031, |
|
"loss": 3.2802, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.0003125, |
|
"loss": 3.3111, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.000315, |
|
"loss": 3.2518, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.0003175, |
|
"loss": 3.3339, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.00032, |
|
"loss": 3.2785, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.00032250000000000003, |
|
"loss": 3.2316, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.00032500000000000004, |
|
"loss": 3.3004, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.00032750000000000005, |
|
"loss": 3.236, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.00033, |
|
"loss": 3.3492, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.0003325, |
|
"loss": 3.2794, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.000335, |
|
"loss": 3.1176, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.0003375, |
|
"loss": 3.2454, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.00034, |
|
"loss": 3.2039, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 0.00034250000000000003, |
|
"loss": 3.2624, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.000345, |
|
"loss": 3.2248, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 0.0003475, |
|
"loss": 3.26, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 0.00035, |
|
"loss": 3.2288, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
26211, |
|
17123, |
|
12060, |
|
8793 |
|
], |
|
"eval_loss": 1.7274117469787598, |
|
"eval_precisions": [ |
|
58.21691135641783, |
|
41.200673724735324, |
|
31.649389843852514, |
|
25.342979017754207 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 140.7961, |
|
"eval_samples_per_second": 24.596, |
|
"eval_score": 37.242954038765475, |
|
"eval_steps_per_second": 24.596, |
|
"eval_sys_len": 45023, |
|
"eval_totals": [ |
|
45023, |
|
41560, |
|
38105, |
|
34696 |
|
], |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.0003525, |
|
"loss": 3.2446, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 0.000355, |
|
"loss": 3.1755, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.0003575, |
|
"loss": 3.274, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 0.00035999999999999997, |
|
"loss": 3.1518, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.0003625, |
|
"loss": 3.2472, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 0.000365, |
|
"loss": 3.231, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.0003675, |
|
"loss": 3.2377, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 0.00037, |
|
"loss": 3.2576, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 0.0003725, |
|
"loss": 3.2433, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 0.000375, |
|
"loss": 3.2144, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 0.0003775, |
|
"loss": 3.2166, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.00038, |
|
"loss": 3.1721, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 0.00038250000000000003, |
|
"loss": 3.1729, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.00038500000000000003, |
|
"loss": 3.1772, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.00038750000000000004, |
|
"loss": 3.1715, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.00039000000000000005, |
|
"loss": 3.0745, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 0.0003925, |
|
"loss": 3.1181, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.000395, |
|
"loss": 3.0892, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 0.0003975, |
|
"loss": 3.1591, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 0.0004, |
|
"loss": 3.1232, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
25985, |
|
17105, |
|
12124, |
|
8931 |
|
], |
|
"eval_loss": 1.6549996137619019, |
|
"eval_precisions": [ |
|
61.648873072360615, |
|
44.2138185953938, |
|
34.410921579201315, |
|
28.063725490196077 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 118.8752, |
|
"eval_samples_per_second": 29.131, |
|
"eval_score": 40.27923361043648, |
|
"eval_steps_per_second": 29.131, |
|
"eval_sys_len": 42150, |
|
"eval_totals": [ |
|
42150, |
|
38687, |
|
35233, |
|
31824 |
|
], |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.0004025, |
|
"loss": 3.049, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.00040500000000000003, |
|
"loss": 3.16, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.0004075, |
|
"loss": 3.1724, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 0.00041, |
|
"loss": 3.114, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.0004125, |
|
"loss": 3.1586, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.000415, |
|
"loss": 3.0389, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.0004175, |
|
"loss": 3.0599, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 0.00042, |
|
"loss": 3.1169, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 0.00042249999999999997, |
|
"loss": 3.0748, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 0.000425, |
|
"loss": 3.0661, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.0004275, |
|
"loss": 3.125, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.00043, |
|
"loss": 3.153, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.0004325, |
|
"loss": 3.0269, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.000435, |
|
"loss": 3.0262, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.0004375, |
|
"loss": 2.993, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 0.00044, |
|
"loss": 3.0263, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.0004425, |
|
"loss": 2.959, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.00044500000000000003, |
|
"loss": 3.0096, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 0.00044750000000000004, |
|
"loss": 3.0877, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.00045000000000000004, |
|
"loss": 3.0788, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
25630, |
|
16905, |
|
12013, |
|
8850 |
|
], |
|
"eval_loss": 1.6167092323303223, |
|
"eval_precisions": [ |
|
60.873076192285765, |
|
43.748867782924876, |
|
34.141419882907975, |
|
27.850332001132895 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 122.6416, |
|
"eval_samples_per_second": 28.237, |
|
"eval_score": 39.89109940073679, |
|
"eval_steps_per_second": 28.237, |
|
"eval_sys_len": 42104, |
|
"eval_totals": [ |
|
42104, |
|
38641, |
|
35186, |
|
31777 |
|
], |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.00045250000000000005, |
|
"loss": 3.1263, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.000455, |
|
"loss": 3.0045, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 0.0004575, |
|
"loss": 3.0452, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.00046, |
|
"loss": 3.0684, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.0004625, |
|
"loss": 3.0385, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.000465, |
|
"loss": 2.9502, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.00046750000000000003, |
|
"loss": 3.019, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.00047, |
|
"loss": 2.9622, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.0004725, |
|
"loss": 2.9952, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.000475, |
|
"loss": 2.961, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.0004775, |
|
"loss": 2.9056, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.00048, |
|
"loss": 3.03, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.0004825, |
|
"loss": 2.9475, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.00048499999999999997, |
|
"loss": 2.9383, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 0.0004875, |
|
"loss": 2.9497, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 0.00049, |
|
"loss": 2.9825, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.0004925, |
|
"loss": 2.8683, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.000495, |
|
"loss": 2.9585, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.0004975, |
|
"loss": 3.0039, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 0.0005, |
|
"loss": 2.8811, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
27043, |
|
18237, |
|
13141, |
|
9796 |
|
], |
|
"eval_loss": 1.6076596975326538, |
|
"eval_precisions": [ |
|
62.29957611500184, |
|
45.6552760045062, |
|
36.01359313765792, |
|
29.613954472626137 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 127.9727, |
|
"eval_samples_per_second": 27.06, |
|
"eval_score": 41.733492512975715, |
|
"eval_steps_per_second": 27.06, |
|
"eval_sys_len": 43408, |
|
"eval_totals": [ |
|
43408, |
|
39945, |
|
36489, |
|
33079 |
|
], |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.0005024999999999999, |
|
"loss": 2.9861, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 0.000505, |
|
"loss": 3.0033, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.0005074999999999999, |
|
"loss": 2.8877, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 0.00051, |
|
"loss": 2.9709, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.0005124999999999999, |
|
"loss": 2.8549, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 0.000515, |
|
"loss": 2.9543, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 0.0005175, |
|
"loss": 2.9383, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 0.0005200000000000001, |
|
"loss": 2.8779, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 0.0005225, |
|
"loss": 2.969, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.0005250000000000001, |
|
"loss": 2.968, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 0.0005275, |
|
"loss": 2.8835, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 0.0005300000000000001, |
|
"loss": 3.0172, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 0.0005325, |
|
"loss": 2.9274, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 0.000535, |
|
"loss": 2.9879, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.0005375, |
|
"loss": 2.9297, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 0.00054, |
|
"loss": 2.8153, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 0.0005425, |
|
"loss": 2.8727, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.000545, |
|
"loss": 2.8239, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 0.0005475, |
|
"loss": 2.9188, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.00055, |
|
"loss": 2.8291, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
26097, |
|
17579, |
|
12717, |
|
9479 |
|
], |
|
"eval_loss": 1.545904278755188, |
|
"eval_precisions": [ |
|
55.22005924672027, |
|
40.13745233691805, |
|
31.5221971593585, |
|
25.666089028484784 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 165.394, |
|
"eval_samples_per_second": 20.938, |
|
"eval_score": 36.59364849906923, |
|
"eval_steps_per_second": 20.938, |
|
"eval_sys_len": 47260, |
|
"eval_totals": [ |
|
47260, |
|
43797, |
|
40343, |
|
36932 |
|
], |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 0.0005525, |
|
"loss": 2.8052, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 0.000555, |
|
"loss": 2.8976, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.0005575, |
|
"loss": 2.8193, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 0.0005600000000000001, |
|
"loss": 2.9084, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.0005625000000000001, |
|
"loss": 2.8175, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 0.000565, |
|
"loss": 2.8522, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 0.0005675, |
|
"loss": 2.8453, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 0.00057, |
|
"loss": 2.9098, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.0005725, |
|
"loss": 2.7735, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 0.000575, |
|
"loss": 2.8275, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 0.0005775, |
|
"loss": 2.8569, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 0.00058, |
|
"loss": 2.7792, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 0.0005825, |
|
"loss": 2.8173, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 0.000585, |
|
"loss": 2.8759, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 0.0005875, |
|
"loss": 2.8538, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.00059, |
|
"loss": 2.902, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 0.0005925, |
|
"loss": 2.7693, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 0.0005949999999999999, |
|
"loss": 2.8489, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.0005975, |
|
"loss": 2.8515, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 0.0006, |
|
"loss": 2.8358, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"eval_bp": 0.951493698303458, |
|
"eval_counts": [ |
|
27661, |
|
18999, |
|
13858, |
|
10389 |
|
], |
|
"eval_loss": 1.5499262809753418, |
|
"eval_precisions": [ |
|
70.17174459017225, |
|
52.839581710980084, |
|
42.60722521137586, |
|
35.65324822402965 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 72.495, |
|
"eval_samples_per_second": 47.769, |
|
"eval_score": 46.35343428916555, |
|
"eval_steps_per_second": 47.769, |
|
"eval_sys_len": 39419, |
|
"eval_totals": [ |
|
39419, |
|
35956, |
|
32525, |
|
29139 |
|
], |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 0.0006025000000000001, |
|
"loss": 2.8804, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 0.000605, |
|
"loss": 2.7958, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 0.0006075000000000001, |
|
"loss": 2.8646, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 0.00061, |
|
"loss": 2.7276, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 0.0006125000000000001, |
|
"loss": 2.8928, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.000615, |
|
"loss": 2.8545, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 0.0006175000000000001, |
|
"loss": 2.7296, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 0.00062, |
|
"loss": 2.7535, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 0.0006225000000000001, |
|
"loss": 2.8058, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 0.000625, |
|
"loss": 2.7666, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 0.0006274999999999999, |
|
"loss": 2.7758, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 0.00063, |
|
"loss": 2.6975, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 0.0006324999999999999, |
|
"loss": 2.7532, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 0.000635, |
|
"loss": 2.7222, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 0.0006374999999999999, |
|
"loss": 2.8252, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 0.00064, |
|
"loss": 2.8289, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 0.0006425, |
|
"loss": 2.7046, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 0.0006450000000000001, |
|
"loss": 2.7199, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 0.0006475, |
|
"loss": 2.6622, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 0.0006500000000000001, |
|
"loss": 2.764, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
26024, |
|
17669, |
|
12817, |
|
9561 |
|
], |
|
"eval_loss": 1.508206844329834, |
|
"eval_precisions": [ |
|
55.35019248356977, |
|
40.56803049088488, |
|
31.96418774003691, |
|
26.063133791298657 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 173.7893, |
|
"eval_samples_per_second": 19.926, |
|
"eval_score": 36.9826767402354, |
|
"eval_steps_per_second": 19.926, |
|
"eval_sys_len": 47017, |
|
"eval_totals": [ |
|
47017, |
|
43554, |
|
40098, |
|
36684 |
|
], |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 0.0006525, |
|
"loss": 2.7553, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 0.0006550000000000001, |
|
"loss": 2.7883, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 0.0006575, |
|
"loss": 2.7572, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 0.00066, |
|
"loss": 2.7026, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 0.0006625, |
|
"loss": 2.7265, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 0.000665, |
|
"loss": 2.5958, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 0.0006675, |
|
"loss": 2.69, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 0.00067, |
|
"loss": 2.7464, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 0.0006725, |
|
"loss": 2.7263, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 0.000675, |
|
"loss": 2.7336, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 0.0006775, |
|
"loss": 2.7562, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 0.00068, |
|
"loss": 2.7315, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 0.0006825000000000001, |
|
"loss": 2.7706, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 0.0006850000000000001, |
|
"loss": 2.6668, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 0.0006875, |
|
"loss": 2.7939, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 0.00069, |
|
"loss": 2.7894, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 0.0006925, |
|
"loss": 2.7196, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 0.000695, |
|
"loss": 2.6768, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 0.0006975, |
|
"loss": 2.5136, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 0.0007, |
|
"loss": 2.6378, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"eval_bp": 0.982521383577959, |
|
"eval_counts": [ |
|
27410, |
|
18929, |
|
13880, |
|
10452 |
|
], |
|
"eval_loss": 1.4984954595565796, |
|
"eval_precisions": [ |
|
67.40937484629384, |
|
50.88577649936826, |
|
41.13445751711466, |
|
34.460929772502475 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 88.8496, |
|
"eval_samples_per_second": 38.976, |
|
"eval_score": 46.13753405463289, |
|
"eval_steps_per_second": 38.976, |
|
"eval_sys_len": 40662, |
|
"eval_totals": [ |
|
40662, |
|
37199, |
|
33743, |
|
30330 |
|
], |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 0.0007025, |
|
"loss": 2.6048, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 0.000705, |
|
"loss": 2.6136, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 0.0007075, |
|
"loss": 2.5976, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 0.00071, |
|
"loss": 2.6645, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 0.0007125, |
|
"loss": 2.7466, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 0.000715, |
|
"loss": 2.6573, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 0.0007175, |
|
"loss": 2.6746, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 0.0007199999999999999, |
|
"loss": 2.6083, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 0.0007225, |
|
"loss": 2.718, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 0.000725, |
|
"loss": 2.6234, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 0.0007275000000000001, |
|
"loss": 2.6455, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 0.00073, |
|
"loss": 2.6674, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 0.0007325000000000001, |
|
"loss": 2.6671, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 0.000735, |
|
"loss": 2.6839, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.0007375000000000001, |
|
"loss": 2.7052, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 0.00074, |
|
"loss": 2.7401, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 0.0007425000000000001, |
|
"loss": 2.6638, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 0.000745, |
|
"loss": 2.5944, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 0.0007475000000000001, |
|
"loss": 2.6262, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 0.00075, |
|
"loss": 2.701, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
26685, |
|
18421, |
|
13517, |
|
10181 |
|
], |
|
"eval_loss": 1.4741408824920654, |
|
"eval_precisions": [ |
|
63.57355568790947, |
|
47.83184461985874, |
|
38.558306709265175, |
|
32.17457257529311 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 129.3598, |
|
"eval_samples_per_second": 26.77, |
|
"eval_score": 44.071323498930454, |
|
"eval_steps_per_second": 26.77, |
|
"eval_sys_len": 41975, |
|
"eval_totals": [ |
|
41975, |
|
38512, |
|
35056, |
|
31643 |
|
], |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 0.0007524999999999999, |
|
"loss": 2.6454, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 0.000755, |
|
"loss": 2.7318, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 0.0007574999999999999, |
|
"loss": 2.6678, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 0.00076, |
|
"loss": 2.6132, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 0.0007624999999999999, |
|
"loss": 2.6956, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 0.0007650000000000001, |
|
"loss": 2.5574, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 0.0007675, |
|
"loss": 2.6114, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 0.0007700000000000001, |
|
"loss": 2.6667, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 0.0007725, |
|
"loss": 2.5687, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 10.09, |
|
"learning_rate": 0.0007750000000000001, |
|
"loss": 2.4745, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 0.0007775, |
|
"loss": 2.6286, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 0.0007800000000000001, |
|
"loss": 2.6025, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"learning_rate": 0.0007825, |
|
"loss": 2.5623, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 0.000785, |
|
"loss": 2.6493, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 0.0007875, |
|
"loss": 2.5866, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 0.00079, |
|
"loss": 2.5541, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 0.0007925, |
|
"loss": 2.6685, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 0.000795, |
|
"loss": 2.5534, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 10.39, |
|
"learning_rate": 0.0007975, |
|
"loss": 2.6261, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 0.0008, |
|
"loss": 2.6547, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"eval_bp": 0.9986941383168589, |
|
"eval_counts": [ |
|
27057, |
|
18731, |
|
13763, |
|
10375 |
|
], |
|
"eval_loss": 1.4648829698562622, |
|
"eval_precisions": [ |
|
65.47368421052632, |
|
49.47176588664096, |
|
40.00174388188107, |
|
33.47422081693231 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 105.6078, |
|
"eval_samples_per_second": 32.791, |
|
"eval_score": 45.57596776445642, |
|
"eval_steps_per_second": 32.791, |
|
"eval_sys_len": 41325, |
|
"eval_totals": [ |
|
41325, |
|
37862, |
|
34406, |
|
30994 |
|
], |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 10.45, |
|
"learning_rate": 0.0008025, |
|
"loss": 2.4607, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 0.000805, |
|
"loss": 2.596, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 10.52, |
|
"learning_rate": 0.0008075000000000001, |
|
"loss": 2.5666, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 0.0008100000000000001, |
|
"loss": 2.5642, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 0.0008125000000000001, |
|
"loss": 2.5876, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 0.000815, |
|
"loss": 2.5194, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 0.0008175, |
|
"loss": 2.71, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 0.00082, |
|
"loss": 2.5289, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 0.0008225, |
|
"loss": 2.5532, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"learning_rate": 0.000825, |
|
"loss": 2.5693, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"learning_rate": 0.0008275, |
|
"loss": 2.5581, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"learning_rate": 0.00083, |
|
"loss": 2.5051, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"learning_rate": 0.0008325, |
|
"loss": 2.5048, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 0.000835, |
|
"loss": 2.5013, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 0.0008375, |
|
"loss": 2.5961, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"learning_rate": 0.00084, |
|
"loss": 2.5747, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 0.0008425, |
|
"loss": 2.513, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 0.0008449999999999999, |
|
"loss": 2.5362, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 0.0008475000000000001, |
|
"loss": 2.4747, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 0.00085, |
|
"loss": 2.4868, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
27887, |
|
19470, |
|
14351, |
|
10835 |
|
], |
|
"eval_loss": 1.463901162147522, |
|
"eval_precisions": [ |
|
65.5994918962151, |
|
49.861708666256916, |
|
40.29255692506388, |
|
33.617747440273035 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 111.3022, |
|
"eval_samples_per_second": 31.113, |
|
"eval_score": 45.87913991332588, |
|
"eval_steps_per_second": 31.113, |
|
"eval_sys_len": 42511, |
|
"eval_totals": [ |
|
42511, |
|
39048, |
|
35617, |
|
32230 |
|
], |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 0.0008525000000000001, |
|
"loss": 2.4984, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 0.000855, |
|
"loss": 2.4708, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 11.17, |
|
"learning_rate": 0.0008575000000000001, |
|
"loss": 2.4994, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 0.00086, |
|
"loss": 2.3728, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"learning_rate": 0.0008625000000000001, |
|
"loss": 2.4748, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 11.26, |
|
"learning_rate": 0.000865, |
|
"loss": 2.4959, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 0.0008675000000000001, |
|
"loss": 2.513, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"learning_rate": 0.00087, |
|
"loss": 2.5693, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"learning_rate": 0.0008725000000000001, |
|
"loss": 2.5396, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"learning_rate": 0.000875, |
|
"loss": 2.5051, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 0.0008774999999999999, |
|
"loss": 2.4765, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 11.46, |
|
"learning_rate": 0.00088, |
|
"loss": 2.4833, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 0.0008824999999999999, |
|
"loss": 2.509, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 0.000885, |
|
"loss": 2.5082, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"learning_rate": 0.0008874999999999999, |
|
"loss": 2.5851, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 0.0008900000000000001, |
|
"loss": 2.4521, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"learning_rate": 0.0008925, |
|
"loss": 2.4873, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"learning_rate": 0.0008950000000000001, |
|
"loss": 2.4671, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 11.69, |
|
"learning_rate": 0.0008975, |
|
"loss": 2.5156, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"learning_rate": 0.0009000000000000001, |
|
"loss": 2.4155, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
26675, |
|
18446, |
|
13573, |
|
10248 |
|
], |
|
"eval_loss": 1.4466536045074463, |
|
"eval_precisions": [ |
|
64.46350894151765, |
|
48.64836353087006, |
|
39.38655291488929, |
|
33.009083295754685 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 110.662, |
|
"eval_samples_per_second": 31.293, |
|
"eval_score": 44.93563710796045, |
|
"eval_steps_per_second": 31.293, |
|
"eval_sys_len": 41380, |
|
"eval_totals": [ |
|
41380, |
|
37917, |
|
34461, |
|
31046 |
|
], |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 11.75, |
|
"learning_rate": 0.0009025, |
|
"loss": 2.5093, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"learning_rate": 0.0009050000000000001, |
|
"loss": 2.532, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 11.82, |
|
"learning_rate": 0.0009075, |
|
"loss": 2.5547, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"learning_rate": 0.00091, |
|
"loss": 2.3751, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"learning_rate": 0.0009125, |
|
"loss": 2.5222, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 11.92, |
|
"learning_rate": 0.000915, |
|
"loss": 2.4935, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 0.0009175, |
|
"loss": 2.5756, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"learning_rate": 0.00092, |
|
"loss": 2.5278, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 0.0009225, |
|
"loss": 2.4911, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 0.000925, |
|
"loss": 2.4734, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 12.08, |
|
"learning_rate": 0.0009275, |
|
"loss": 2.3916, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"learning_rate": 0.00093, |
|
"loss": 2.443, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"learning_rate": 0.0009325000000000001, |
|
"loss": 2.3527, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"learning_rate": 0.0009350000000000001, |
|
"loss": 2.3896, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 0.0009375, |
|
"loss": 2.4948, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 0.00094, |
|
"loss": 2.4026, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 0.0009425, |
|
"loss": 2.3684, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 0.000945, |
|
"loss": 2.434, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 12.34, |
|
"learning_rate": 0.0009475, |
|
"loss": 2.426, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 12.37, |
|
"learning_rate": 0.00095, |
|
"loss": 2.4975, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 12.37, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
26470, |
|
18231, |
|
13333, |
|
10001 |
|
], |
|
"eval_loss": 1.4460936784744263, |
|
"eval_precisions": [ |
|
56.14593276063209, |
|
41.73572638615448, |
|
33.11888320333847, |
|
27.118414273705902 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 158.852, |
|
"eval_samples_per_second": 21.8, |
|
"eval_score": 38.08829771113567, |
|
"eval_steps_per_second": 21.8, |
|
"eval_sys_len": 47145, |
|
"eval_totals": [ |
|
47145, |
|
43682, |
|
40258, |
|
36879 |
|
], |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 0.0009525, |
|
"loss": 2.4968, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"learning_rate": 0.000955, |
|
"loss": 2.4392, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 12.47, |
|
"learning_rate": 0.0009575, |
|
"loss": 2.4044, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 0.00096, |
|
"loss": 2.4352, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 0.0009625, |
|
"loss": 2.398, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 12.57, |
|
"learning_rate": 0.000965, |
|
"loss": 2.5434, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 0.0009675, |
|
"loss": 2.335, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 0.0009699999999999999, |
|
"loss": 2.41, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 12.66, |
|
"learning_rate": 0.0009725000000000001, |
|
"loss": 2.4202, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 0.000975, |
|
"loss": 2.4073, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 0.0009775, |
|
"loss": 2.3379, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"learning_rate": 0.00098, |
|
"loss": 2.4531, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 0.0009825, |
|
"loss": 2.3775, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 12.83, |
|
"learning_rate": 0.000985, |
|
"loss": 2.4836, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 0.0009875, |
|
"loss": 2.5058, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 12.89, |
|
"learning_rate": 0.00099, |
|
"loss": 2.4745, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"learning_rate": 0.0009925000000000001, |
|
"loss": 2.426, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 0.000995, |
|
"loss": 2.4218, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 0.0009975000000000001, |
|
"loss": 2.3397, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 0.001, |
|
"loss": 2.3011, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"eval_bp": 0.9755140388093076, |
|
"eval_counts": [ |
|
28338, |
|
19935, |
|
14788, |
|
11220 |
|
], |
|
"eval_loss": 1.4539415836334229, |
|
"eval_precisions": [ |
|
70.18178215860122, |
|
54.00243803331979, |
|
44.164377015888185, |
|
37.280701754385966 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 75.0864, |
|
"eval_samples_per_second": 46.12, |
|
"eval_score": 48.756442564740674, |
|
"eval_steps_per_second": 46.12, |
|
"eval_sys_len": 40378, |
|
"eval_totals": [ |
|
40378, |
|
36915, |
|
33484, |
|
30096 |
|
], |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 0.001, |
|
"loss": 2.4352, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 0.001, |
|
"loss": 2.3339, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"learning_rate": 0.001, |
|
"loss": 2.3264, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 13.15, |
|
"learning_rate": 0.001, |
|
"loss": 2.3393, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 0.001, |
|
"loss": 2.4056, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 0.001, |
|
"loss": 2.3324, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 13.25, |
|
"learning_rate": 0.001, |
|
"loss": 2.3016, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"learning_rate": 0.001, |
|
"loss": 2.3232, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 13.32, |
|
"learning_rate": 0.001, |
|
"loss": 2.3512, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 13.35, |
|
"learning_rate": 0.001, |
|
"loss": 2.2561, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 13.38, |
|
"learning_rate": 0.001, |
|
"loss": 2.3663, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 0.001, |
|
"loss": 2.414, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"learning_rate": 0.001, |
|
"loss": 2.3334, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"learning_rate": 0.001, |
|
"loss": 2.324, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 0.001, |
|
"loss": 2.4558, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"learning_rate": 0.001, |
|
"loss": 2.2729, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 13.58, |
|
"learning_rate": 0.001, |
|
"loss": 2.3651, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 0.001, |
|
"loss": 2.4071, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 0.001, |
|
"loss": 2.3455, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"learning_rate": 0.001, |
|
"loss": 2.383, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
26856, |
|
18659, |
|
13769, |
|
10407 |
|
], |
|
"eval_loss": 1.422828197479248, |
|
"eval_precisions": [ |
|
63.941334730125476, |
|
48.41714671233588, |
|
39.242454470316645, |
|
32.85037878787879 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 112.127, |
|
"eval_samples_per_second": 30.885, |
|
"eval_score": 44.6960720730811, |
|
"eval_steps_per_second": 30.885, |
|
"eval_sys_len": 42001, |
|
"eval_totals": [ |
|
42001, |
|
38538, |
|
35087, |
|
31680 |
|
], |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 0.001, |
|
"loss": 2.2856, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"learning_rate": 0.001, |
|
"loss": 2.33, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 0.001, |
|
"loss": 2.3685, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 0.001, |
|
"loss": 2.3132, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 13.84, |
|
"learning_rate": 0.001, |
|
"loss": 2.3598, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"learning_rate": 0.001, |
|
"loss": 2.3929, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 13.9, |
|
"learning_rate": 0.001, |
|
"loss": 2.2815, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"learning_rate": 0.001, |
|
"loss": 2.3766, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 0.001, |
|
"loss": 2.4054, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 0.001, |
|
"loss": 2.3955, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 0.001, |
|
"loss": 2.3044, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"learning_rate": 0.001, |
|
"loss": 2.2661, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 0.001, |
|
"loss": 2.2011, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"learning_rate": 0.001, |
|
"loss": 2.2519, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"learning_rate": 0.001, |
|
"loss": 2.2913, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 0.001, |
|
"loss": 2.284, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 14.23, |
|
"learning_rate": 0.001, |
|
"loss": 2.2612, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 14.26, |
|
"learning_rate": 0.001, |
|
"loss": 2.2675, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 0.001, |
|
"loss": 2.2262, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"learning_rate": 0.001, |
|
"loss": 2.2696, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"eval_bp": 0.9583631202356716, |
|
"eval_counts": [ |
|
27315, |
|
19078, |
|
14092, |
|
10677 |
|
], |
|
"eval_loss": 1.4258805513381958, |
|
"eval_precisions": [ |
|
68.81912776196116, |
|
52.660925251186924, |
|
42.99880999603332, |
|
36.36208834247182 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 79.9145, |
|
"eval_samples_per_second": 43.334, |
|
"eval_score": 46.757988379346315, |
|
"eval_steps_per_second": 43.334, |
|
"eval_sys_len": 39691, |
|
"eval_totals": [ |
|
39691, |
|
36228, |
|
32773, |
|
29363 |
|
], |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 14.36, |
|
"learning_rate": 0.001, |
|
"loss": 2.3408, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 14.39, |
|
"learning_rate": 0.001, |
|
"loss": 2.32, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 0.001, |
|
"loss": 2.2473, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 14.45, |
|
"learning_rate": 0.001, |
|
"loss": 2.243, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 0.001, |
|
"loss": 2.294, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"learning_rate": 0.001, |
|
"loss": 2.2595, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 0.001, |
|
"loss": 2.2146, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"learning_rate": 0.001, |
|
"loss": 2.2468, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"learning_rate": 0.001, |
|
"loss": 2.3313, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 14.65, |
|
"learning_rate": 0.001, |
|
"loss": 2.2623, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"learning_rate": 0.001, |
|
"loss": 2.2167, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 0.001, |
|
"loss": 2.3084, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 14.75, |
|
"learning_rate": 0.001, |
|
"loss": 2.2722, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 0.001, |
|
"loss": 2.2878, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 0.001, |
|
"loss": 2.2647, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 0.001, |
|
"loss": 2.35, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 14.88, |
|
"learning_rate": 0.001, |
|
"loss": 2.312, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 14.91, |
|
"learning_rate": 0.001, |
|
"loss": 2.3462, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"learning_rate": 0.001, |
|
"loss": 2.2526, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"learning_rate": 0.001, |
|
"loss": 2.2648, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"eval_bp": 0.96840860582464, |
|
"eval_counts": [ |
|
27875, |
|
19603, |
|
14568, |
|
11071 |
|
], |
|
"eval_loss": 1.4148167371749878, |
|
"eval_precisions": [ |
|
69.52758655093285, |
|
53.5177045510388, |
|
43.91258477769404, |
|
37.19094329481322 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 80.5243, |
|
"eval_samples_per_second": 43.006, |
|
"eval_score": 48.08159289033059, |
|
"eval_steps_per_second": 43.006, |
|
"eval_sys_len": 40092, |
|
"eval_totals": [ |
|
40092, |
|
36629, |
|
33175, |
|
29768 |
|
], |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 0.001, |
|
"loss": 2.2865, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 0.001, |
|
"loss": 2.1537, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 15.07, |
|
"learning_rate": 0.001, |
|
"loss": 2.1461, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 0.001, |
|
"loss": 2.2309, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 15.14, |
|
"learning_rate": 0.001, |
|
"loss": 2.1519, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 15.17, |
|
"learning_rate": 0.001, |
|
"loss": 2.2306, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 0.001, |
|
"loss": 2.1776, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 0.001, |
|
"loss": 2.1811, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 15.27, |
|
"learning_rate": 0.001, |
|
"loss": 2.2001, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 15.3, |
|
"learning_rate": 0.001, |
|
"loss": 2.1924, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"learning_rate": 0.001, |
|
"loss": 2.1418, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 15.37, |
|
"learning_rate": 0.001, |
|
"loss": 2.2936, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 0.001, |
|
"loss": 2.2203, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"learning_rate": 0.001, |
|
"loss": 2.2136, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 15.46, |
|
"learning_rate": 0.001, |
|
"loss": 2.1555, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 0.001, |
|
"loss": 2.3135, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 0.001, |
|
"loss": 2.2464, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"learning_rate": 0.001, |
|
"loss": 2.2005, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"learning_rate": 0.001, |
|
"loss": 2.1989, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"learning_rate": 0.001, |
|
"loss": 2.2123, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
28009, |
|
19746, |
|
14685, |
|
11165 |
|
], |
|
"eval_loss": 1.432946801185608, |
|
"eval_precisions": [ |
|
67.3374203630244, |
|
51.78327913563411, |
|
42.3491752220556, |
|
35.71314333237373 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 95.3413, |
|
"eval_samples_per_second": 36.322, |
|
"eval_score": 47.92142944979905, |
|
"eval_steps_per_second": 36.322, |
|
"eval_sys_len": 41595, |
|
"eval_totals": [ |
|
41595, |
|
38132, |
|
34676, |
|
31263 |
|
], |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"learning_rate": 0.001, |
|
"loss": 2.2782, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"learning_rate": 0.001, |
|
"loss": 2.2205, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 15.72, |
|
"learning_rate": 0.001, |
|
"loss": 2.212, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"learning_rate": 0.001, |
|
"loss": 2.2153, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 0.001, |
|
"loss": 2.2444, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 15.82, |
|
"learning_rate": 0.001, |
|
"loss": 2.285, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 15.85, |
|
"learning_rate": 0.001, |
|
"loss": 2.2001, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"learning_rate": 0.001, |
|
"loss": 2.3122, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 15.92, |
|
"learning_rate": 0.001, |
|
"loss": 2.2141, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"learning_rate": 0.001, |
|
"loss": 2.1724, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 15.98, |
|
"learning_rate": 0.001, |
|
"loss": 2.2171, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"learning_rate": 0.001, |
|
"loss": 2.1489, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 16.05, |
|
"learning_rate": 0.001, |
|
"loss": 2.1308, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 16.08, |
|
"learning_rate": 0.001, |
|
"loss": 2.1169, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 16.12, |
|
"learning_rate": 0.001, |
|
"loss": 2.0744, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 16.15, |
|
"learning_rate": 0.001, |
|
"loss": 2.1527, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 16.18, |
|
"learning_rate": 0.001, |
|
"loss": 2.1514, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 0.001, |
|
"loss": 2.2027, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 16.25, |
|
"learning_rate": 0.001, |
|
"loss": 2.1814, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 16.28, |
|
"learning_rate": 0.001, |
|
"loss": 2.1292, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 16.28, |
|
"eval_bp": 0.9822754617987439, |
|
"eval_counts": [ |
|
27961, |
|
19662, |
|
14591, |
|
11071 |
|
], |
|
"eval_loss": 1.4380427598953247, |
|
"eval_precisions": [ |
|
68.7813637705402, |
|
52.87047245153137, |
|
43.253097764866304, |
|
36.513852242744065 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 88.9695, |
|
"eval_samples_per_second": 38.923, |
|
"eval_score": 48.08648210020106, |
|
"eval_steps_per_second": 38.923, |
|
"eval_sys_len": 40652, |
|
"eval_totals": [ |
|
40652, |
|
37189, |
|
33734, |
|
30320 |
|
], |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"learning_rate": 0.001, |
|
"loss": 2.185, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"learning_rate": 0.001, |
|
"loss": 2.1878, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 16.38, |
|
"learning_rate": 0.001, |
|
"loss": 2.1767, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 16.41, |
|
"learning_rate": 0.001, |
|
"loss": 2.011, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"learning_rate": 0.001, |
|
"loss": 2.0795, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 16.47, |
|
"learning_rate": 0.001, |
|
"loss": 2.2293, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"learning_rate": 0.001, |
|
"loss": 2.1918, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 16.54, |
|
"learning_rate": 0.001, |
|
"loss": 2.1273, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"learning_rate": 0.001, |
|
"loss": 2.1646, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 16.6, |
|
"learning_rate": 0.001, |
|
"loss": 2.1413, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"learning_rate": 0.001, |
|
"loss": 2.0892, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 0.001, |
|
"loss": 2.1659, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 16.7, |
|
"learning_rate": 0.001, |
|
"loss": 2.1922, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 16.73, |
|
"learning_rate": 0.001, |
|
"loss": 2.2153, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"learning_rate": 0.001, |
|
"loss": 2.1297, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 0.001, |
|
"loss": 2.2311, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 16.83, |
|
"learning_rate": 0.001, |
|
"loss": 2.1413, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"learning_rate": 0.001, |
|
"loss": 2.1689, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"learning_rate": 0.001, |
|
"loss": 2.1698, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 16.93, |
|
"learning_rate": 0.001, |
|
"loss": 2.0922, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 16.93, |
|
"eval_bp": 0.9634606403593156, |
|
"eval_counts": [ |
|
27055, |
|
18947, |
|
14032, |
|
10647 |
|
], |
|
"eval_loss": 1.420142650604248, |
|
"eval_precisions": [ |
|
67.8172156213967, |
|
52.00790535532925, |
|
42.54699818071558, |
|
36.002434653230985 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 88.5384, |
|
"eval_samples_per_second": 39.113, |
|
"eval_score": 46.450084211438956, |
|
"eval_steps_per_second": 39.113, |
|
"eval_sys_len": 39894, |
|
"eval_totals": [ |
|
39894, |
|
36431, |
|
32980, |
|
29573 |
|
], |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 0.001, |
|
"loss": 2.212, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"learning_rate": 0.001, |
|
"loss": 2.1306, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"learning_rate": 0.001, |
|
"loss": 2.0919, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 17.06, |
|
"learning_rate": 0.001, |
|
"loss": 1.977, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"learning_rate": 0.001, |
|
"loss": 2.0461, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"learning_rate": 0.001, |
|
"loss": 2.1332, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"learning_rate": 0.001, |
|
"loss": 2.116, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 17.19, |
|
"learning_rate": 0.001, |
|
"loss": 2.0639, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"learning_rate": 0.001, |
|
"loss": 2.0844, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 17.25, |
|
"learning_rate": 0.001, |
|
"loss": 2.0423, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 17.29, |
|
"learning_rate": 0.001, |
|
"loss": 2.0831, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 17.32, |
|
"learning_rate": 0.001, |
|
"loss": 2.0964, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"learning_rate": 0.001, |
|
"loss": 2.0526, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 17.38, |
|
"learning_rate": 0.001, |
|
"loss": 2.0584, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 17.42, |
|
"learning_rate": 0.001, |
|
"loss": 2.1096, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"learning_rate": 0.001, |
|
"loss": 2.0576, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"learning_rate": 0.001, |
|
"loss": 2.0952, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 17.51, |
|
"learning_rate": 0.001, |
|
"loss": 2.0561, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"learning_rate": 0.001, |
|
"loss": 2.1358, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 17.58, |
|
"learning_rate": 0.001, |
|
"loss": 2.066, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 17.58, |
|
"eval_bp": 1.0, |
|
"eval_counts": [ |
|
27996, |
|
19728, |
|
14711, |
|
11223 |
|
], |
|
"eval_loss": 1.4336364269256592, |
|
"eval_precisions": [ |
|
66.97607655502392, |
|
51.45942562015807, |
|
42.17240489636786, |
|
35.660269445856635 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 98.8235, |
|
"eval_samples_per_second": 35.042, |
|
"eval_score": 47.71439333194693, |
|
"eval_steps_per_second": 35.042, |
|
"eval_sys_len": 41800, |
|
"eval_totals": [ |
|
41800, |
|
38337, |
|
34883, |
|
31472 |
|
], |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 0.001, |
|
"loss": 2.1324, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"learning_rate": 0.001, |
|
"loss": 2.1562, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"learning_rate": 0.001, |
|
"loss": 2.064, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"learning_rate": 0.001, |
|
"loss": 2.1753, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 17.74, |
|
"learning_rate": 0.001, |
|
"loss": 2.1545, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 17.78, |
|
"learning_rate": 0.001, |
|
"loss": 2.1808, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 17.81, |
|
"learning_rate": 0.001, |
|
"loss": 1.9854, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 17.84, |
|
"learning_rate": 0.001, |
|
"loss": 2.1477, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 17.87, |
|
"learning_rate": 0.001, |
|
"loss": 2.147, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 17.91, |
|
"learning_rate": 0.001, |
|
"loss": 2.1071, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"learning_rate": 0.001, |
|
"loss": 2.1168, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 17.97, |
|
"learning_rate": 0.001, |
|
"loss": 2.0857, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 0.001, |
|
"loss": 2.1181, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 18.04, |
|
"learning_rate": 0.001, |
|
"loss": 2.0222, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 18.07, |
|
"learning_rate": 0.001, |
|
"loss": 1.9394, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 18.1, |
|
"learning_rate": 0.001, |
|
"loss": 2.0617, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"learning_rate": 0.001, |
|
"loss": 1.9781, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 18.17, |
|
"learning_rate": 0.001, |
|
"loss": 1.9584, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 18.2, |
|
"learning_rate": 0.001, |
|
"loss": 1.9962, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 18.23, |
|
"learning_rate": 0.001, |
|
"loss": 1.9179, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 18.23, |
|
"eval_bp": 0.9862765874121399, |
|
"eval_counts": [ |
|
28447, |
|
20181, |
|
15105, |
|
11544 |
|
], |
|
"eval_loss": 1.4454234838485718, |
|
"eval_precisions": [ |
|
69.6974151659929, |
|
54.02923538230885, |
|
44.560151041359376, |
|
37.866561700452664 |
|
], |
|
"eval_ref_len": 41379, |
|
"eval_runtime": 80.4217, |
|
"eval_samples_per_second": 43.06, |
|
"eval_score": 49.51773016565914, |
|
"eval_steps_per_second": 43.06, |
|
"eval_sys_len": 40815, |
|
"eval_totals": [ |
|
40815, |
|
37352, |
|
33898, |
|
30486 |
|
], |
|
"step": 5600 |
|
} |
|
], |
|
"max_steps": 76750, |
|
"num_train_epochs": 250, |
|
"total_flos": 5.39878684251648e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|