|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 45.0, |
|
"global_step": 6300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0007936507936507937, |
|
"loss": 0.4795, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0015873015873015873, |
|
"loss": 0.4792, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.002380952380952381, |
|
"loss": 0.4786, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0031746031746031746, |
|
"loss": 0.4773, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.003968253968253968, |
|
"loss": 0.4743, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.004761904761904762, |
|
"loss": 0.4672, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.005555555555555556, |
|
"loss": 0.4456, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.006349206349206349, |
|
"loss": 0.3561, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.007142857142857143, |
|
"loss": 0.2459, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.007936507936507936, |
|
"loss": 0.2057, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00873015873015873, |
|
"loss": 0.1847, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.009523809523809525, |
|
"loss": 0.1765, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.010317460317460317, |
|
"loss": 0.1404, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.011111111111111112, |
|
"loss": 0.1251, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.011904761904761904, |
|
"loss": 0.1224, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.012698412698412698, |
|
"loss": 0.1241, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.013492063492063493, |
|
"loss": 0.1137, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.014285714285714285, |
|
"loss": 0.1178, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.01507936507936508, |
|
"loss": 0.1117, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.015873015873015872, |
|
"loss": 0.1147, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.016666666666666666, |
|
"loss": 0.1049, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.01746031746031746, |
|
"loss": 0.1153, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.018253968253968255, |
|
"loss": 0.1156, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.01904761904761905, |
|
"loss": 0.1154, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.01984126984126984, |
|
"loss": 0.1049, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.020634920634920634, |
|
"loss": 0.0982, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.02142857142857143, |
|
"loss": 0.1182, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.022222222222222223, |
|
"loss": 0.1183, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.023015873015873017, |
|
"loss": 0.1131, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.023809523809523808, |
|
"loss": 0.1063, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.024603174603174603, |
|
"loss": 0.0985, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.025396825396825397, |
|
"loss": 0.1262, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.026190476190476195, |
|
"loss": 0.1319, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.026984126984126985, |
|
"loss": 0.1123, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.02777777777777778, |
|
"loss": 0.1116, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.02857142857142857, |
|
"loss": 0.1031, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.02936507936507937, |
|
"loss": 0.1021, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.03015873015873016, |
|
"loss": 0.1067, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.030952380952380953, |
|
"loss": 0.1038, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.031746031746031744, |
|
"loss": 0.1127, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.03253968253968254, |
|
"loss": 0.1008, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.03333333333333333, |
|
"loss": 0.0965, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.03412698412698413, |
|
"loss": 0.0965, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.03492063492063492, |
|
"loss": 0.095, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.03571428571428572, |
|
"loss": 0.0876, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.03650793650793651, |
|
"loss": 0.1019, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.0373015873015873, |
|
"loss": 0.0881, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.0380952380952381, |
|
"loss": 0.0901, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.03888888888888889, |
|
"loss": 0.0878, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.03968253968253968, |
|
"loss": 0.0861, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 0.04047619047619048, |
|
"loss": 0.0865, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 0.04126984126984127, |
|
"loss": 0.0818, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 0.04206349206349207, |
|
"loss": 0.0789, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 0.04285714285714286, |
|
"loss": 0.1004, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 0.043650793650793655, |
|
"loss": 0.0799, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.044444444444444446, |
|
"loss": 0.0818, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.045238095238095244, |
|
"loss": 0.0777, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 0.046031746031746035, |
|
"loss": 0.0767, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.046825396825396826, |
|
"loss": 0.0711, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.047619047619047616, |
|
"loss": 0.0723, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.048412698412698414, |
|
"loss": 0.0694, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.049206349206349205, |
|
"loss": 0.0657, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.05, |
|
"loss": 0.0567, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.049911816578483245, |
|
"loss": 0.0617, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.049823633156966494, |
|
"loss": 0.0678, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 0.04973544973544974, |
|
"loss": 0.0647, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.049647266313932986, |
|
"loss": 0.0543, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.04955908289241623, |
|
"loss": 0.0558, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 0.04947089947089947, |
|
"loss": 0.0519, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.04938271604938271, |
|
"loss": 0.0506, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.04929453262786596, |
|
"loss": 0.0583, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.049206349206349205, |
|
"loss": 0.0518, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 0.049118165784832454, |
|
"loss": 0.0491, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.0490299823633157, |
|
"loss": 0.0471, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.048941798941798946, |
|
"loss": 0.0488, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.04885361552028219, |
|
"loss": 0.0449, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 0.04876543209876544, |
|
"loss": 0.0417, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.04867724867724868, |
|
"loss": 0.0409, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 0.04858906525573192, |
|
"loss": 0.0485, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 0.048500881834215165, |
|
"loss": 0.0457, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.048412698412698414, |
|
"loss": 0.0422, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.04832451499118166, |
|
"loss": 0.0407, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.048236331569664906, |
|
"loss": 0.0394, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.04814814814814815, |
|
"loss": 0.0417, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.0480599647266314, |
|
"loss": 0.0358, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 0.04797178130511464, |
|
"loss": 0.0352, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 0.04788359788359789, |
|
"loss": 0.0389, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 0.04779541446208113, |
|
"loss": 0.036, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.047707231040564374, |
|
"loss": 0.034, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 0.047619047619047616, |
|
"loss": 0.0359, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 0.047530864197530866, |
|
"loss": 0.0355, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 0.04744268077601411, |
|
"loss": 0.03, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 0.04735449735449736, |
|
"loss": 0.0278, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 0.0472663139329806, |
|
"loss": 0.0337, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 0.04717813051146385, |
|
"loss": 0.0293, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.04708994708994709, |
|
"loss": 0.0302, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 0.04700176366843034, |
|
"loss": 0.0332, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.04691358024691358, |
|
"loss": 0.0292, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.046825396825396826, |
|
"loss": 0.0298, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.04673721340388007, |
|
"loss": 0.0269, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 0.04664902998236332, |
|
"loss": 0.0236, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 0.04656084656084656, |
|
"loss": 0.025, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 0.04647266313932981, |
|
"loss": 0.0266, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 0.04638447971781305, |
|
"loss": 0.0229, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 0.0462962962962963, |
|
"loss": 0.0232, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 0.04620811287477954, |
|
"loss": 0.0244, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 0.04611992945326279, |
|
"loss": 0.0241, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 0.046031746031746035, |
|
"loss": 0.0256, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 0.04594356261022928, |
|
"loss": 0.0266, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 0.04585537918871252, |
|
"loss": 0.0234, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 0.04576719576719577, |
|
"loss": 0.0223, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.04567901234567901, |
|
"loss": 0.02, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 0.04559082892416226, |
|
"loss": 0.022, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 0.0455026455026455, |
|
"loss": 0.0217, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 0.04541446208112875, |
|
"loss": 0.0219, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 0.045326278659611995, |
|
"loss": 0.0197, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 0.045238095238095244, |
|
"loss": 0.0209, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 0.045149911816578486, |
|
"loss": 0.0221, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 0.04506172839506173, |
|
"loss": 0.0198, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 0.04497354497354497, |
|
"loss": 0.0199, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 0.04488536155202822, |
|
"loss": 0.0196, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 0.04479717813051146, |
|
"loss": 0.019, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 0.04470899470899471, |
|
"loss": 0.0195, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 0.044620811287477954, |
|
"loss": 0.0196, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 0.044532627865961204, |
|
"loss": 0.0184, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.044444444444444446, |
|
"loss": 0.0205, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 0.044356261022927695, |
|
"loss": 0.0191, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 0.04426807760141094, |
|
"loss": 0.0184, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 0.04417989417989418, |
|
"loss": 0.0211, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 0.04409171075837742, |
|
"loss": 0.0184, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 0.04400352733686067, |
|
"loss": 0.0188, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 0.043915343915343914, |
|
"loss": 0.017, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 0.043827160493827164, |
|
"loss": 0.0158, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 0.043738977072310406, |
|
"loss": 0.0158, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 0.043650793650793655, |
|
"loss": 0.0165, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 0.0435626102292769, |
|
"loss": 0.0151, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 0.04347442680776015, |
|
"loss": 0.0144, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 0.04338624338624339, |
|
"loss": 0.0168, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 0.04329805996472663, |
|
"loss": 0.0162, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.043209876543209874, |
|
"loss": 0.0177, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"learning_rate": 0.04312169312169312, |
|
"loss": 0.0144, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 0.043033509700176366, |
|
"loss": 0.0145, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 0.042945326278659615, |
|
"loss": 0.0153, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 0.04285714285714286, |
|
"loss": 0.0147, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"learning_rate": 0.04276895943562611, |
|
"loss": 0.0133, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 0.04268077601410935, |
|
"loss": 0.0139, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 0.0425925925925926, |
|
"loss": 0.0133, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 0.04250440917107584, |
|
"loss": 0.0135, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 0.04241622574955908, |
|
"loss": 0.0106, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 0.042328042328042326, |
|
"loss": 0.0132, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.79, |
|
"learning_rate": 0.042239858906525575, |
|
"loss": 0.0137, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"learning_rate": 0.04215167548500882, |
|
"loss": 0.0127, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 0.04206349206349207, |
|
"loss": 0.0116, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 0.04197530864197531, |
|
"loss": 0.0123, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 0.04188712522045856, |
|
"loss": 0.0125, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 0.0417989417989418, |
|
"loss": 0.0111, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 0.04171075837742505, |
|
"loss": 0.0106, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"learning_rate": 0.04162257495590829, |
|
"loss": 0.0105, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"learning_rate": 0.041534391534391535, |
|
"loss": 0.0099, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 0.04144620811287478, |
|
"loss": 0.0116, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 0.04135802469135803, |
|
"loss": 0.0115, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"learning_rate": 0.04126984126984127, |
|
"loss": 0.0113, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"learning_rate": 0.04118165784832452, |
|
"loss": 0.012, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 0.04109347442680776, |
|
"loss": 0.0115, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"learning_rate": 0.04100529100529101, |
|
"loss": 0.0117, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.04091710758377425, |
|
"loss": 0.0112, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 0.0408289241622575, |
|
"loss": 0.0095, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.040740740740740744, |
|
"loss": 0.0114, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"learning_rate": 0.040652557319223986, |
|
"loss": 0.0114, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"learning_rate": 0.04056437389770723, |
|
"loss": 0.0105, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 0.04047619047619048, |
|
"loss": 0.011, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"learning_rate": 0.04038800705467372, |
|
"loss": 0.0113, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 0.04029982363315697, |
|
"loss": 0.0098, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 12.43, |
|
"learning_rate": 0.04021164021164021, |
|
"loss": 0.0093, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 0.04012345679012346, |
|
"loss": 0.0086, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 12.57, |
|
"learning_rate": 0.040035273368606704, |
|
"loss": 0.0086, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 12.64, |
|
"learning_rate": 0.03994708994708995, |
|
"loss": 0.0077, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 0.039858906525573196, |
|
"loss": 0.0088, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 0.03977072310405644, |
|
"loss": 0.0092, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 0.03968253968253968, |
|
"loss": 0.0097, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 0.03959435626102293, |
|
"loss": 0.0089, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 0.03950617283950617, |
|
"loss": 0.007, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"learning_rate": 0.03941798941798942, |
|
"loss": 0.0071, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"learning_rate": 0.039329805996472664, |
|
"loss": 0.008, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 0.03924162257495591, |
|
"loss": 0.0074, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 13.29, |
|
"learning_rate": 0.039153439153439155, |
|
"loss": 0.0084, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 13.36, |
|
"learning_rate": 0.039065255731922405, |
|
"loss": 0.0079, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"learning_rate": 0.03897707231040565, |
|
"loss": 0.0082, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 0.03888888888888889, |
|
"loss": 0.0079, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 0.03880070546737213, |
|
"loss": 0.007, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 0.03871252204585538, |
|
"loss": 0.0075, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 0.038624338624338624, |
|
"loss": 0.0071, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 13.79, |
|
"learning_rate": 0.03853615520282187, |
|
"loss": 0.0073, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 13.86, |
|
"learning_rate": 0.038447971781305115, |
|
"loss": 0.0073, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"learning_rate": 0.038359788359788365, |
|
"loss": 0.0069, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 0.03827160493827161, |
|
"loss": 0.0062, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 0.038183421516754856, |
|
"loss": 0.0066, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 14.14, |
|
"learning_rate": 0.0380952380952381, |
|
"loss": 0.0067, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 0.03800705467372134, |
|
"loss": 0.0086, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 0.03791887125220458, |
|
"loss": 0.0075, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 14.36, |
|
"learning_rate": 0.03783068783068783, |
|
"loss": 0.0072, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 14.43, |
|
"learning_rate": 0.037742504409171075, |
|
"loss": 0.008, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"learning_rate": 0.037654320987654324, |
|
"loss": 0.0074, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 14.57, |
|
"learning_rate": 0.03756613756613757, |
|
"loss": 0.0054, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 0.037477954144620816, |
|
"loss": 0.0067, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 0.03738977072310406, |
|
"loss": 0.0062, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 14.79, |
|
"learning_rate": 0.0373015873015873, |
|
"loss": 0.0058, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 14.86, |
|
"learning_rate": 0.03721340388007054, |
|
"loss": 0.0064, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 0.03712522045855379, |
|
"loss": 0.0055, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 0.037037037037037035, |
|
"loss": 0.0059, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 15.07, |
|
"learning_rate": 0.036948853615520284, |
|
"loss": 0.0058, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 15.14, |
|
"learning_rate": 0.03686067019400353, |
|
"loss": 0.006, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 15.21, |
|
"learning_rate": 0.036772486772486776, |
|
"loss": 0.0055, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 15.29, |
|
"learning_rate": 0.03668430335097002, |
|
"loss": 0.0065, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 0.03659611992945327, |
|
"loss": 0.0063, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"learning_rate": 0.03650793650793651, |
|
"loss": 0.0059, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 0.03641975308641975, |
|
"loss": 0.0052, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 15.57, |
|
"learning_rate": 0.036331569664902995, |
|
"loss": 0.0056, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 15.64, |
|
"learning_rate": 0.036243386243386244, |
|
"loss": 0.0052, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 0.036155202821869487, |
|
"loss": 0.0056, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 0.036067019400352736, |
|
"loss": 0.0051, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"learning_rate": 0.03597883597883598, |
|
"loss": 0.0055, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 0.03589065255731923, |
|
"loss": 0.0059, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.03580246913580247, |
|
"loss": 0.0061, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 16.07, |
|
"learning_rate": 0.03571428571428572, |
|
"loss": 0.0061, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 16.14, |
|
"learning_rate": 0.03562610229276896, |
|
"loss": 0.0054, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 0.035537918871252204, |
|
"loss": 0.0064, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 16.29, |
|
"learning_rate": 0.035449735449735446, |
|
"loss": 0.0055, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"learning_rate": 0.035361552028218696, |
|
"loss": 0.0052, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 16.43, |
|
"learning_rate": 0.03527336860670194, |
|
"loss": 0.0051, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"learning_rate": 0.03518518518518519, |
|
"loss": 0.005, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"learning_rate": 0.03509700176366843, |
|
"loss": 0.0055, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"learning_rate": 0.03500881834215168, |
|
"loss": 0.0052, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"learning_rate": 0.03492063492063492, |
|
"loss": 0.0051, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"learning_rate": 0.03483245149911817, |
|
"loss": 0.0052, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"learning_rate": 0.03474426807760141, |
|
"loss": 0.0053, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 16.93, |
|
"learning_rate": 0.034656084656084656, |
|
"loss": 0.0056, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 0.0345679012345679, |
|
"loss": 0.0054, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 0.03447971781305115, |
|
"loss": 0.0053, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 0.03439153439153439, |
|
"loss": 0.0054, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"learning_rate": 0.03430335097001764, |
|
"loss": 0.0064, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 17.29, |
|
"learning_rate": 0.03421516754850088, |
|
"loss": 0.005, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 17.36, |
|
"learning_rate": 0.03412698412698413, |
|
"loss": 0.0054, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 17.43, |
|
"learning_rate": 0.03403880070546737, |
|
"loss": 0.0047, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 0.03395061728395062, |
|
"loss": 0.0055, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 17.57, |
|
"learning_rate": 0.033862433862433865, |
|
"loss": 0.0052, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 17.64, |
|
"learning_rate": 0.03377425044091711, |
|
"loss": 0.0048, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"learning_rate": 0.03368606701940035, |
|
"loss": 0.0049, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 17.79, |
|
"learning_rate": 0.0335978835978836, |
|
"loss": 0.0049, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"learning_rate": 0.03350970017636684, |
|
"loss": 0.0055, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 17.93, |
|
"learning_rate": 0.03342151675485009, |
|
"loss": 0.0052, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 0.03333333333333333, |
|
"loss": 0.0059, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 18.07, |
|
"learning_rate": 0.03324514991181658, |
|
"loss": 0.0053, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 18.14, |
|
"learning_rate": 0.033156966490299825, |
|
"loss": 0.0051, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 18.21, |
|
"learning_rate": 0.033068783068783074, |
|
"loss": 0.0043, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 18.29, |
|
"learning_rate": 0.032980599647266316, |
|
"loss": 0.0045, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 18.36, |
|
"learning_rate": 0.03289241622574956, |
|
"loss": 0.0048, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 18.43, |
|
"learning_rate": 0.0328042328042328, |
|
"loss": 0.0042, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 18.5, |
|
"learning_rate": 0.03271604938271605, |
|
"loss": 0.0048, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 18.57, |
|
"learning_rate": 0.03262786596119929, |
|
"loss": 0.0052, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 0.03253968253968254, |
|
"loss": 0.0061, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 18.71, |
|
"learning_rate": 0.032451499118165784, |
|
"loss": 0.0052, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 18.79, |
|
"learning_rate": 0.032363315696649034, |
|
"loss": 0.0045, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 18.86, |
|
"learning_rate": 0.032275132275132276, |
|
"loss": 0.0045, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"learning_rate": 0.032186948853615525, |
|
"loss": 0.0055, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 0.03209876543209877, |
|
"loss": 0.0046, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 19.07, |
|
"learning_rate": 0.03201058201058201, |
|
"loss": 0.0047, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 19.14, |
|
"learning_rate": 0.03192239858906525, |
|
"loss": 0.0049, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 19.21, |
|
"learning_rate": 0.0318342151675485, |
|
"loss": 0.0049, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 19.29, |
|
"learning_rate": 0.031746031746031744, |
|
"loss": 0.005, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 19.36, |
|
"learning_rate": 0.031657848324514994, |
|
"loss": 0.0054, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 19.43, |
|
"learning_rate": 0.031569664902998236, |
|
"loss": 0.0049, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 19.5, |
|
"learning_rate": 0.031481481481481485, |
|
"loss": 0.0047, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 0.03139329805996473, |
|
"loss": 0.0046, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 19.64, |
|
"learning_rate": 0.03130511463844798, |
|
"loss": 0.0045, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 19.71, |
|
"learning_rate": 0.031216931216931216, |
|
"loss": 0.0043, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"learning_rate": 0.031128747795414465, |
|
"loss": 0.0044, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 19.86, |
|
"learning_rate": 0.031040564373897708, |
|
"loss": 0.0051, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 19.93, |
|
"learning_rate": 0.030952380952380953, |
|
"loss": 0.0048, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.030864197530864196, |
|
"loss": 0.0044, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 20.07, |
|
"learning_rate": 0.030776014109347445, |
|
"loss": 0.0048, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 20.14, |
|
"learning_rate": 0.030687830687830688, |
|
"loss": 0.0046, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 0.030599647266313937, |
|
"loss": 0.0045, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 20.29, |
|
"learning_rate": 0.03051146384479718, |
|
"loss": 0.004, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 20.36, |
|
"learning_rate": 0.030423280423280425, |
|
"loss": 0.0043, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 20.43, |
|
"learning_rate": 0.030335097001763667, |
|
"loss": 0.0052, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 20.5, |
|
"learning_rate": 0.030246913580246917, |
|
"loss": 0.0053, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 20.57, |
|
"learning_rate": 0.03015873015873016, |
|
"loss": 0.0053, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 20.64, |
|
"learning_rate": 0.030070546737213405, |
|
"loss": 0.0048, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 20.71, |
|
"learning_rate": 0.029982363315696647, |
|
"loss": 0.0056, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 20.79, |
|
"learning_rate": 0.029894179894179897, |
|
"loss": 0.0045, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 20.86, |
|
"learning_rate": 0.02980599647266314, |
|
"loss": 0.0048, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 20.93, |
|
"learning_rate": 0.02971781305114639, |
|
"loss": 0.005, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 0.02962962962962963, |
|
"loss": 0.0047, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 21.07, |
|
"learning_rate": 0.029541446208112877, |
|
"loss": 0.0046, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 21.14, |
|
"learning_rate": 0.02945326278659612, |
|
"loss": 0.0047, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 21.21, |
|
"learning_rate": 0.02936507936507937, |
|
"loss": 0.0046, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 21.29, |
|
"learning_rate": 0.02927689594356261, |
|
"loss": 0.0046, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 21.36, |
|
"learning_rate": 0.029188712522045857, |
|
"loss": 0.0041, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 21.43, |
|
"learning_rate": 0.0291005291005291, |
|
"loss": 0.0048, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 21.5, |
|
"learning_rate": 0.029012345679012348, |
|
"loss": 0.0044, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 21.57, |
|
"learning_rate": 0.02892416225749559, |
|
"loss": 0.0045, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 21.64, |
|
"learning_rate": 0.02883597883597884, |
|
"loss": 0.0045, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 21.71, |
|
"learning_rate": 0.028747795414462082, |
|
"loss": 0.0043, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 21.79, |
|
"learning_rate": 0.028659611992945328, |
|
"loss": 0.0038, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 21.86, |
|
"learning_rate": 0.02857142857142857, |
|
"loss": 0.0044, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 21.93, |
|
"learning_rate": 0.02848324514991182, |
|
"loss": 0.0041, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 0.028395061728395062, |
|
"loss": 0.0044, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 22.07, |
|
"learning_rate": 0.028306878306878308, |
|
"loss": 0.0038, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 22.14, |
|
"learning_rate": 0.02821869488536155, |
|
"loss": 0.0039, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 22.21, |
|
"learning_rate": 0.0281305114638448, |
|
"loss": 0.0038, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 22.29, |
|
"learning_rate": 0.028042328042328042, |
|
"loss": 0.0041, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 22.36, |
|
"learning_rate": 0.02795414462081129, |
|
"loss": 0.0042, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 22.43, |
|
"learning_rate": 0.027865961199294534, |
|
"loss": 0.0044, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"learning_rate": 0.02777777777777778, |
|
"loss": 0.0044, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 22.57, |
|
"learning_rate": 0.027689594356261022, |
|
"loss": 0.0045, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"learning_rate": 0.02760141093474427, |
|
"loss": 0.0043, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 22.71, |
|
"learning_rate": 0.027513227513227514, |
|
"loss": 0.0042, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 22.79, |
|
"learning_rate": 0.02742504409171076, |
|
"loss": 0.0037, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 22.86, |
|
"learning_rate": 0.027336860670194002, |
|
"loss": 0.0041, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 22.93, |
|
"learning_rate": 0.02724867724867725, |
|
"loss": 0.0039, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 0.027160493827160494, |
|
"loss": 0.0048, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 23.07, |
|
"learning_rate": 0.027072310405643743, |
|
"loss": 0.004, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 23.14, |
|
"learning_rate": 0.026984126984126985, |
|
"loss": 0.0041, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 23.21, |
|
"learning_rate": 0.02689594356261023, |
|
"loss": 0.0043, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 23.29, |
|
"learning_rate": 0.026807760141093474, |
|
"loss": 0.0044, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 23.36, |
|
"learning_rate": 0.026719576719576723, |
|
"loss": 0.004, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 23.43, |
|
"learning_rate": 0.026631393298059965, |
|
"loss": 0.0041, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 23.5, |
|
"learning_rate": 0.02654320987654321, |
|
"loss": 0.0042, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 23.57, |
|
"learning_rate": 0.026455026455026454, |
|
"loss": 0.004, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 23.64, |
|
"learning_rate": 0.026366843033509703, |
|
"loss": 0.0038, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 23.71, |
|
"learning_rate": 0.026278659611992945, |
|
"loss": 0.0043, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 23.79, |
|
"learning_rate": 0.026190476190476195, |
|
"loss": 0.0043, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 23.86, |
|
"learning_rate": 0.026102292768959437, |
|
"loss": 0.0042, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 23.93, |
|
"learning_rate": 0.026014109347442683, |
|
"loss": 0.0037, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 0.025925925925925925, |
|
"loss": 0.0032, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 24.07, |
|
"learning_rate": 0.025837742504409174, |
|
"loss": 0.0038, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 24.14, |
|
"learning_rate": 0.025749559082892417, |
|
"loss": 0.0044, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 24.21, |
|
"learning_rate": 0.025661375661375663, |
|
"loss": 0.0035, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 24.29, |
|
"learning_rate": 0.025573192239858905, |
|
"loss": 0.0035, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 24.36, |
|
"learning_rate": 0.025485008818342154, |
|
"loss": 0.0036, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 24.43, |
|
"learning_rate": 0.025396825396825397, |
|
"loss": 0.0035, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 24.5, |
|
"learning_rate": 0.025308641975308646, |
|
"loss": 0.0037, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 24.57, |
|
"learning_rate": 0.02522045855379189, |
|
"loss": 0.0038, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 24.64, |
|
"learning_rate": 0.025132275132275134, |
|
"loss": 0.0042, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 24.71, |
|
"learning_rate": 0.025044091710758377, |
|
"loss": 0.0035, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 24.79, |
|
"learning_rate": 0.024955908289241623, |
|
"loss": 0.0047, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 24.86, |
|
"learning_rate": 0.02486772486772487, |
|
"loss": 0.0041, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 24.93, |
|
"learning_rate": 0.024779541446208114, |
|
"loss": 0.0043, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 0.024691358024691357, |
|
"loss": 0.0039, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 25.07, |
|
"learning_rate": 0.024603174603174603, |
|
"loss": 0.0041, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 25.14, |
|
"learning_rate": 0.02451499118165785, |
|
"loss": 0.0042, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 25.21, |
|
"learning_rate": 0.024426807760141094, |
|
"loss": 0.0041, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 25.29, |
|
"learning_rate": 0.02433862433862434, |
|
"loss": 0.0037, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 25.36, |
|
"learning_rate": 0.024250440917107582, |
|
"loss": 0.0042, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 25.43, |
|
"learning_rate": 0.02416225749559083, |
|
"loss": 0.0035, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 25.5, |
|
"learning_rate": 0.024074074074074074, |
|
"loss": 0.0042, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 25.57, |
|
"learning_rate": 0.02398589065255732, |
|
"loss": 0.0039, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 0.023897707231040566, |
|
"loss": 0.0039, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 25.71, |
|
"learning_rate": 0.023809523809523808, |
|
"loss": 0.0035, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 25.79, |
|
"learning_rate": 0.023721340388007054, |
|
"loss": 0.0035, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 25.86, |
|
"learning_rate": 0.0236331569664903, |
|
"loss": 0.0041, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 25.93, |
|
"learning_rate": 0.023544973544973546, |
|
"loss": 0.004, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 0.02345679012345679, |
|
"loss": 0.0034, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 26.07, |
|
"learning_rate": 0.023368606701940034, |
|
"loss": 0.0039, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 26.14, |
|
"learning_rate": 0.02328042328042328, |
|
"loss": 0.0037, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 26.21, |
|
"learning_rate": 0.023192239858906526, |
|
"loss": 0.0042, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 26.29, |
|
"learning_rate": 0.02310405643738977, |
|
"loss": 0.0034, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 26.36, |
|
"learning_rate": 0.023015873015873017, |
|
"loss": 0.0036, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 26.43, |
|
"learning_rate": 0.02292768959435626, |
|
"loss": 0.0034, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 26.5, |
|
"learning_rate": 0.022839506172839506, |
|
"loss": 0.004, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 26.57, |
|
"learning_rate": 0.02275132275132275, |
|
"loss": 0.0037, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 26.64, |
|
"learning_rate": 0.022663139329805997, |
|
"loss": 0.0039, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 26.71, |
|
"learning_rate": 0.022574955908289243, |
|
"loss": 0.0034, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 26.79, |
|
"learning_rate": 0.022486772486772486, |
|
"loss": 0.0035, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 26.86, |
|
"learning_rate": 0.02239858906525573, |
|
"loss": 0.0035, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 26.93, |
|
"learning_rate": 0.022310405643738977, |
|
"loss": 0.0039, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 0.022222222222222223, |
|
"loss": 0.0035, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 27.07, |
|
"learning_rate": 0.02213403880070547, |
|
"loss": 0.0038, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 27.14, |
|
"learning_rate": 0.02204585537918871, |
|
"loss": 0.0039, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 27.21, |
|
"learning_rate": 0.021957671957671957, |
|
"loss": 0.0033, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 27.29, |
|
"learning_rate": 0.021869488536155203, |
|
"loss": 0.0035, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 27.36, |
|
"learning_rate": 0.02178130511463845, |
|
"loss": 0.0038, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 27.43, |
|
"learning_rate": 0.021693121693121695, |
|
"loss": 0.0032, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"learning_rate": 0.021604938271604937, |
|
"loss": 0.0042, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 27.57, |
|
"learning_rate": 0.021516754850088183, |
|
"loss": 0.0036, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 27.64, |
|
"learning_rate": 0.02142857142857143, |
|
"loss": 0.0034, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 27.71, |
|
"learning_rate": 0.021340388007054675, |
|
"loss": 0.0037, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 27.79, |
|
"learning_rate": 0.02125220458553792, |
|
"loss": 0.0036, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 27.86, |
|
"learning_rate": 0.021164021164021163, |
|
"loss": 0.0033, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 27.93, |
|
"learning_rate": 0.02107583774250441, |
|
"loss": 0.0035, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 0.020987654320987655, |
|
"loss": 0.0034, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 28.07, |
|
"learning_rate": 0.0208994708994709, |
|
"loss": 0.0037, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 28.14, |
|
"learning_rate": 0.020811287477954146, |
|
"loss": 0.003, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 28.21, |
|
"learning_rate": 0.02072310405643739, |
|
"loss": 0.0033, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 28.29, |
|
"learning_rate": 0.020634920634920634, |
|
"loss": 0.004, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 28.36, |
|
"learning_rate": 0.02054673721340388, |
|
"loss": 0.0036, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 28.43, |
|
"learning_rate": 0.020458553791887126, |
|
"loss": 0.0034, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 28.5, |
|
"learning_rate": 0.020370370370370372, |
|
"loss": 0.0035, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"learning_rate": 0.020282186948853614, |
|
"loss": 0.0034, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 28.64, |
|
"learning_rate": 0.02019400352733686, |
|
"loss": 0.0035, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 28.71, |
|
"learning_rate": 0.020105820105820106, |
|
"loss": 0.0037, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 28.79, |
|
"learning_rate": 0.020017636684303352, |
|
"loss": 0.0033, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 28.86, |
|
"learning_rate": 0.019929453262786598, |
|
"loss": 0.0037, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 28.93, |
|
"learning_rate": 0.01984126984126984, |
|
"loss": 0.0033, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 0.019753086419753086, |
|
"loss": 0.0032, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 29.07, |
|
"learning_rate": 0.019664902998236332, |
|
"loss": 0.0034, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 29.14, |
|
"learning_rate": 0.019576719576719578, |
|
"loss": 0.0036, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 29.21, |
|
"learning_rate": 0.019488536155202824, |
|
"loss": 0.0032, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 29.29, |
|
"learning_rate": 0.019400352733686066, |
|
"loss": 0.003, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 29.36, |
|
"learning_rate": 0.019312169312169312, |
|
"loss": 0.0033, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 29.43, |
|
"learning_rate": 0.019223985890652558, |
|
"loss": 0.003, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 29.5, |
|
"learning_rate": 0.019135802469135803, |
|
"loss": 0.0034, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 29.57, |
|
"learning_rate": 0.01904761904761905, |
|
"loss": 0.0032, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 29.64, |
|
"learning_rate": 0.01895943562610229, |
|
"loss": 0.0031, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 29.71, |
|
"learning_rate": 0.018871252204585538, |
|
"loss": 0.0031, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 29.79, |
|
"learning_rate": 0.018783068783068783, |
|
"loss": 0.003, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 29.86, |
|
"learning_rate": 0.01869488536155203, |
|
"loss": 0.0034, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 29.93, |
|
"learning_rate": 0.01860670194003527, |
|
"loss": 0.0037, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 0.018518518518518517, |
|
"loss": 0.0031, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 30.07, |
|
"learning_rate": 0.018430335097001763, |
|
"loss": 0.0032, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 30.14, |
|
"learning_rate": 0.01834215167548501, |
|
"loss": 0.0032, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 30.21, |
|
"learning_rate": 0.018253968253968255, |
|
"loss": 0.003, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 30.29, |
|
"learning_rate": 0.018165784832451497, |
|
"loss": 0.0033, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 30.36, |
|
"learning_rate": 0.018077601410934743, |
|
"loss": 0.0033, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 30.43, |
|
"learning_rate": 0.01798941798941799, |
|
"loss": 0.003, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 30.5, |
|
"learning_rate": 0.017901234567901235, |
|
"loss": 0.0034, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 30.57, |
|
"learning_rate": 0.01781305114638448, |
|
"loss": 0.0033, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 30.64, |
|
"learning_rate": 0.017724867724867723, |
|
"loss": 0.0036, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 30.71, |
|
"learning_rate": 0.01763668430335097, |
|
"loss": 0.0035, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 30.79, |
|
"learning_rate": 0.017548500881834215, |
|
"loss": 0.0034, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 30.86, |
|
"learning_rate": 0.01746031746031746, |
|
"loss": 0.0034, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 30.93, |
|
"learning_rate": 0.017372134038800707, |
|
"loss": 0.0036, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 0.01728395061728395, |
|
"loss": 0.0036, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 31.07, |
|
"learning_rate": 0.017195767195767195, |
|
"loss": 0.0035, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 31.14, |
|
"learning_rate": 0.01710758377425044, |
|
"loss": 0.0035, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 31.21, |
|
"learning_rate": 0.017019400352733687, |
|
"loss": 0.0034, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 31.29, |
|
"learning_rate": 0.016931216931216932, |
|
"loss": 0.0035, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 31.36, |
|
"learning_rate": 0.016843033509700175, |
|
"loss": 0.0035, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 31.43, |
|
"learning_rate": 0.01675485008818342, |
|
"loss": 0.0043, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 31.5, |
|
"learning_rate": 0.016666666666666666, |
|
"loss": 0.0033, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 31.57, |
|
"learning_rate": 0.016578483245149912, |
|
"loss": 0.0032, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 31.64, |
|
"learning_rate": 0.016490299823633158, |
|
"loss": 0.003, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 31.71, |
|
"learning_rate": 0.0164021164021164, |
|
"loss": 0.0033, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 31.79, |
|
"learning_rate": 0.016313932980599646, |
|
"loss": 0.0033, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 31.86, |
|
"learning_rate": 0.016225749559082892, |
|
"loss": 0.0033, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 31.93, |
|
"learning_rate": 0.016137566137566138, |
|
"loss": 0.0033, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 0.016049382716049384, |
|
"loss": 0.0031, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 32.07, |
|
"learning_rate": 0.015961199294532626, |
|
"loss": 0.0031, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 32.14, |
|
"learning_rate": 0.015873015873015872, |
|
"loss": 0.0034, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 32.21, |
|
"learning_rate": 0.015784832451499118, |
|
"loss": 0.0033, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 32.29, |
|
"learning_rate": 0.015696649029982364, |
|
"loss": 0.003, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 32.36, |
|
"learning_rate": 0.015608465608465608, |
|
"loss": 0.0032, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 32.43, |
|
"learning_rate": 0.015520282186948854, |
|
"loss": 0.0031, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"learning_rate": 0.015432098765432098, |
|
"loss": 0.0034, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 32.57, |
|
"learning_rate": 0.015343915343915344, |
|
"loss": 0.0037, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 32.64, |
|
"learning_rate": 0.01525573192239859, |
|
"loss": 0.0031, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 32.71, |
|
"learning_rate": 0.015167548500881834, |
|
"loss": 0.003, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 32.79, |
|
"learning_rate": 0.01507936507936508, |
|
"loss": 0.003, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 32.86, |
|
"learning_rate": 0.014991181657848324, |
|
"loss": 0.0035, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 32.93, |
|
"learning_rate": 0.01490299823633157, |
|
"loss": 0.0031, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 0.014814814814814815, |
|
"loss": 0.0033, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 33.07, |
|
"learning_rate": 0.01472663139329806, |
|
"loss": 0.0031, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 33.14, |
|
"learning_rate": 0.014638447971781305, |
|
"loss": 0.0032, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 33.21, |
|
"learning_rate": 0.01455026455026455, |
|
"loss": 0.0036, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 33.29, |
|
"learning_rate": 0.014462081128747795, |
|
"loss": 0.0035, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 33.36, |
|
"learning_rate": 0.014373897707231041, |
|
"loss": 0.0033, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 33.43, |
|
"learning_rate": 0.014285714285714285, |
|
"loss": 0.0036, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 33.5, |
|
"learning_rate": 0.014197530864197531, |
|
"loss": 0.003, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 33.57, |
|
"learning_rate": 0.014109347442680775, |
|
"loss": 0.0034, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 33.64, |
|
"learning_rate": 0.014021164021164021, |
|
"loss": 0.0033, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 33.71, |
|
"learning_rate": 0.013932980599647267, |
|
"loss": 0.0033, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 33.79, |
|
"learning_rate": 0.013844797178130511, |
|
"loss": 0.0032, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 33.86, |
|
"learning_rate": 0.013756613756613757, |
|
"loss": 0.0032, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 33.93, |
|
"learning_rate": 0.013668430335097001, |
|
"loss": 0.0033, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 0.013580246913580247, |
|
"loss": 0.0031, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 34.07, |
|
"learning_rate": 0.013492063492063493, |
|
"loss": 0.0029, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 34.14, |
|
"learning_rate": 0.013403880070546737, |
|
"loss": 0.003, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 34.21, |
|
"learning_rate": 0.013315696649029983, |
|
"loss": 0.003, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 34.29, |
|
"learning_rate": 0.013227513227513227, |
|
"loss": 0.0032, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 34.36, |
|
"learning_rate": 0.013139329805996473, |
|
"loss": 0.0029, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 34.43, |
|
"learning_rate": 0.013051146384479718, |
|
"loss": 0.0034, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 34.5, |
|
"learning_rate": 0.012962962962962963, |
|
"loss": 0.0032, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 34.57, |
|
"learning_rate": 0.012874779541446208, |
|
"loss": 0.0035, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 34.64, |
|
"learning_rate": 0.012786596119929453, |
|
"loss": 0.003, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 34.71, |
|
"learning_rate": 0.012698412698412698, |
|
"loss": 0.0029, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 34.79, |
|
"learning_rate": 0.012610229276895944, |
|
"loss": 0.0031, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 34.86, |
|
"learning_rate": 0.012522045855379188, |
|
"loss": 0.0031, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 34.93, |
|
"learning_rate": 0.012433862433862434, |
|
"loss": 0.0034, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 0.012345679012345678, |
|
"loss": 0.0028, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 35.07, |
|
"learning_rate": 0.012257495590828924, |
|
"loss": 0.0033, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 35.14, |
|
"learning_rate": 0.01216931216931217, |
|
"loss": 0.0031, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 35.21, |
|
"learning_rate": 0.012081128747795414, |
|
"loss": 0.0032, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 35.29, |
|
"learning_rate": 0.01199294532627866, |
|
"loss": 0.0034, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 35.36, |
|
"learning_rate": 0.011904761904761904, |
|
"loss": 0.0031, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 35.43, |
|
"learning_rate": 0.01181657848324515, |
|
"loss": 0.003, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 35.5, |
|
"learning_rate": 0.011728395061728396, |
|
"loss": 0.003, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 35.57, |
|
"learning_rate": 0.01164021164021164, |
|
"loss": 0.003, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 35.64, |
|
"learning_rate": 0.011552028218694886, |
|
"loss": 0.003, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 35.71, |
|
"learning_rate": 0.01146384479717813, |
|
"loss": 0.0033, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 35.79, |
|
"learning_rate": 0.011375661375661376, |
|
"loss": 0.0032, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 35.86, |
|
"learning_rate": 0.011287477954144622, |
|
"loss": 0.003, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 35.93, |
|
"learning_rate": 0.011199294532627866, |
|
"loss": 0.003, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 0.011111111111111112, |
|
"loss": 0.0032, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 36.07, |
|
"learning_rate": 0.011022927689594356, |
|
"loss": 0.0034, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 36.14, |
|
"learning_rate": 0.010934744268077601, |
|
"loss": 0.0029, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 36.21, |
|
"learning_rate": 0.010846560846560847, |
|
"loss": 0.0032, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 36.29, |
|
"learning_rate": 0.010758377425044091, |
|
"loss": 0.0029, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"learning_rate": 0.010670194003527337, |
|
"loss": 0.0031, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 36.43, |
|
"learning_rate": 0.010582010582010581, |
|
"loss": 0.0036, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 36.5, |
|
"learning_rate": 0.010493827160493827, |
|
"loss": 0.003, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 36.57, |
|
"learning_rate": 0.010405643738977073, |
|
"loss": 0.0031, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 36.64, |
|
"learning_rate": 0.010317460317460317, |
|
"loss": 0.0031, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 36.71, |
|
"learning_rate": 0.010229276895943563, |
|
"loss": 0.0035, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 36.79, |
|
"learning_rate": 0.010141093474426807, |
|
"loss": 0.003, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.010052910052910053, |
|
"loss": 0.0029, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 36.93, |
|
"learning_rate": 0.009964726631393299, |
|
"loss": 0.0029, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 0.009876543209876543, |
|
"loss": 0.0031, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 37.07, |
|
"learning_rate": 0.009788359788359789, |
|
"loss": 0.0035, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 37.14, |
|
"learning_rate": 0.009700176366843033, |
|
"loss": 0.0034, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 37.21, |
|
"learning_rate": 0.009611992945326279, |
|
"loss": 0.003, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 37.29, |
|
"learning_rate": 0.009523809523809525, |
|
"loss": 0.0031, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 37.36, |
|
"learning_rate": 0.009435626102292769, |
|
"loss": 0.003, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 37.43, |
|
"learning_rate": 0.009347442680776015, |
|
"loss": 0.0032, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"learning_rate": 0.009259259259259259, |
|
"loss": 0.003, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 37.57, |
|
"learning_rate": 0.009171075837742505, |
|
"loss": 0.0032, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 37.64, |
|
"learning_rate": 0.009082892416225749, |
|
"loss": 0.0029, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 37.71, |
|
"learning_rate": 0.008994708994708995, |
|
"loss": 0.0029, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 37.79, |
|
"learning_rate": 0.00890652557319224, |
|
"loss": 0.0034, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 37.86, |
|
"learning_rate": 0.008818342151675485, |
|
"loss": 0.0029, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 37.93, |
|
"learning_rate": 0.00873015873015873, |
|
"loss": 0.0027, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 0.008641975308641974, |
|
"loss": 0.0027, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 38.07, |
|
"learning_rate": 0.00855379188712522, |
|
"loss": 0.0029, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 38.14, |
|
"learning_rate": 0.008465608465608466, |
|
"loss": 0.0029, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 38.21, |
|
"learning_rate": 0.00837742504409171, |
|
"loss": 0.0029, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 38.29, |
|
"learning_rate": 0.008289241622574956, |
|
"loss": 0.0029, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 38.36, |
|
"learning_rate": 0.0082010582010582, |
|
"loss": 0.0033, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 38.43, |
|
"learning_rate": 0.008112874779541446, |
|
"loss": 0.0027, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 38.5, |
|
"learning_rate": 0.008024691358024692, |
|
"loss": 0.0033, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 38.57, |
|
"learning_rate": 0.007936507936507936, |
|
"loss": 0.0027, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 38.64, |
|
"learning_rate": 0.007848324514991182, |
|
"loss": 0.0029, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 38.71, |
|
"learning_rate": 0.007760141093474427, |
|
"loss": 0.0034, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 38.79, |
|
"learning_rate": 0.007671957671957672, |
|
"loss": 0.0032, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 38.86, |
|
"learning_rate": 0.007583774250440917, |
|
"loss": 0.0033, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 38.93, |
|
"learning_rate": 0.007495590828924162, |
|
"loss": 0.0031, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 0.007407407407407408, |
|
"loss": 0.0031, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 39.07, |
|
"learning_rate": 0.007319223985890653, |
|
"loss": 0.0033, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 39.14, |
|
"learning_rate": 0.007231040564373898, |
|
"loss": 0.0029, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 39.21, |
|
"learning_rate": 0.007142857142857143, |
|
"loss": 0.0029, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 39.29, |
|
"learning_rate": 0.007054673721340388, |
|
"loss": 0.003, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 39.36, |
|
"learning_rate": 0.0069664902998236335, |
|
"loss": 0.0026, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 39.43, |
|
"learning_rate": 0.0068783068783068784, |
|
"loss": 0.0029, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 39.5, |
|
"learning_rate": 0.006790123456790123, |
|
"loss": 0.0032, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 39.57, |
|
"learning_rate": 0.006701940035273368, |
|
"loss": 0.0029, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 39.64, |
|
"learning_rate": 0.006613756613756613, |
|
"loss": 0.003, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 39.71, |
|
"learning_rate": 0.006525573192239859, |
|
"loss": 0.0029, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 39.79, |
|
"learning_rate": 0.006437389770723104, |
|
"loss": 0.003, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 39.86, |
|
"learning_rate": 0.006349206349206349, |
|
"loss": 0.003, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 39.93, |
|
"learning_rate": 0.006261022927689594, |
|
"loss": 0.0028, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.006172839506172839, |
|
"loss": 0.0031, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 40.07, |
|
"learning_rate": 0.006084656084656085, |
|
"loss": 0.003, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 40.14, |
|
"learning_rate": 0.00599647266313933, |
|
"loss": 0.003, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 40.21, |
|
"learning_rate": 0.005908289241622575, |
|
"loss": 0.0028, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 40.29, |
|
"learning_rate": 0.00582010582010582, |
|
"loss": 0.0033, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 40.36, |
|
"learning_rate": 0.005731922398589065, |
|
"loss": 0.003, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 40.43, |
|
"learning_rate": 0.005643738977072311, |
|
"loss": 0.0027, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 40.5, |
|
"learning_rate": 0.005555555555555556, |
|
"loss": 0.0031, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 40.57, |
|
"learning_rate": 0.005467372134038801, |
|
"loss": 0.0027, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 40.64, |
|
"learning_rate": 0.005379188712522046, |
|
"loss": 0.0031, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 40.71, |
|
"learning_rate": 0.005291005291005291, |
|
"loss": 0.0032, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 40.79, |
|
"learning_rate": 0.0052028218694885366, |
|
"loss": 0.0031, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 40.86, |
|
"learning_rate": 0.0051146384479717815, |
|
"loss": 0.0028, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 40.93, |
|
"learning_rate": 0.0050264550264550265, |
|
"loss": 0.0031, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 0.0049382716049382715, |
|
"loss": 0.0029, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 41.07, |
|
"learning_rate": 0.0048500881834215165, |
|
"loss": 0.0029, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 41.14, |
|
"learning_rate": 0.004761904761904762, |
|
"loss": 0.0027, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 41.21, |
|
"learning_rate": 0.004673721340388007, |
|
"loss": 0.0026, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 41.29, |
|
"learning_rate": 0.004585537918871252, |
|
"loss": 0.0033, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 41.36, |
|
"learning_rate": 0.004497354497354497, |
|
"loss": 0.003, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 41.43, |
|
"learning_rate": 0.004409171075837742, |
|
"loss": 0.0029, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 41.5, |
|
"learning_rate": 0.004320987654320987, |
|
"loss": 0.0029, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 41.57, |
|
"learning_rate": 0.004232804232804233, |
|
"loss": 0.0029, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 41.64, |
|
"learning_rate": 0.004144620811287478, |
|
"loss": 0.003, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 41.71, |
|
"learning_rate": 0.004056437389770723, |
|
"loss": 0.0029, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 41.79, |
|
"learning_rate": 0.003968253968253968, |
|
"loss": 0.003, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 41.86, |
|
"learning_rate": 0.0038800705467372134, |
|
"loss": 0.0028, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 41.93, |
|
"learning_rate": 0.0037918871252204584, |
|
"loss": 0.0032, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 0.003703703703703704, |
|
"loss": 0.0028, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 42.07, |
|
"learning_rate": 0.003615520282186949, |
|
"loss": 0.0029, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 42.14, |
|
"learning_rate": 0.003527336860670194, |
|
"loss": 0.0028, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 42.21, |
|
"learning_rate": 0.0034391534391534392, |
|
"loss": 0.0028, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 42.29, |
|
"learning_rate": 0.003350970017636684, |
|
"loss": 0.0027, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 42.36, |
|
"learning_rate": 0.0032627865961199296, |
|
"loss": 0.003, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 42.43, |
|
"learning_rate": 0.0031746031746031746, |
|
"loss": 0.003, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 42.5, |
|
"learning_rate": 0.0030864197530864196, |
|
"loss": 0.0029, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 42.57, |
|
"learning_rate": 0.002998236331569665, |
|
"loss": 0.0027, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 42.64, |
|
"learning_rate": 0.00291005291005291, |
|
"loss": 0.0027, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 42.71, |
|
"learning_rate": 0.0028218694885361554, |
|
"loss": 0.0031, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 42.79, |
|
"learning_rate": 0.0027336860670194004, |
|
"loss": 0.0029, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 42.86, |
|
"learning_rate": 0.0026455026455026454, |
|
"loss": 0.0028, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 42.93, |
|
"learning_rate": 0.0025573192239858908, |
|
"loss": 0.0029, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 0.0024691358024691358, |
|
"loss": 0.0028, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 43.07, |
|
"learning_rate": 0.002380952380952381, |
|
"loss": 0.0026, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 43.14, |
|
"learning_rate": 0.002292768959435626, |
|
"loss": 0.0026, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 43.21, |
|
"learning_rate": 0.002204585537918871, |
|
"loss": 0.0028, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 43.29, |
|
"learning_rate": 0.0021164021164021165, |
|
"loss": 0.003, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 43.36, |
|
"learning_rate": 0.0020282186948853615, |
|
"loss": 0.003, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 43.43, |
|
"learning_rate": 0.0019400352733686067, |
|
"loss": 0.0029, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 43.5, |
|
"learning_rate": 0.001851851851851852, |
|
"loss": 0.0028, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 43.57, |
|
"learning_rate": 0.001763668430335097, |
|
"loss": 0.0028, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 43.64, |
|
"learning_rate": 0.001675485008818342, |
|
"loss": 0.0027, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 43.71, |
|
"learning_rate": 0.0015873015873015873, |
|
"loss": 0.003, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 43.79, |
|
"learning_rate": 0.0014991181657848325, |
|
"loss": 0.0028, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 43.86, |
|
"learning_rate": 0.0014109347442680777, |
|
"loss": 0.003, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 43.93, |
|
"learning_rate": 0.0013227513227513227, |
|
"loss": 0.0028, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 0.0012345679012345679, |
|
"loss": 0.003, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 44.07, |
|
"learning_rate": 0.001146384479717813, |
|
"loss": 0.0028, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 44.14, |
|
"learning_rate": 0.0010582010582010583, |
|
"loss": 0.0028, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 44.21, |
|
"learning_rate": 0.0009700176366843034, |
|
"loss": 0.0032, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 44.29, |
|
"learning_rate": 0.0008818342151675485, |
|
"loss": 0.0027, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 44.36, |
|
"learning_rate": 0.0007936507936507937, |
|
"loss": 0.003, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 44.43, |
|
"learning_rate": 0.0007054673721340388, |
|
"loss": 0.0026, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 44.5, |
|
"learning_rate": 0.0006172839506172839, |
|
"loss": 0.0031, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 44.57, |
|
"learning_rate": 0.0005291005291005291, |
|
"loss": 0.0028, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 44.64, |
|
"learning_rate": 0.0004409171075837742, |
|
"loss": 0.003, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 44.71, |
|
"learning_rate": 0.0003527336860670194, |
|
"loss": 0.0026, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 44.79, |
|
"learning_rate": 0.00026455026455026457, |
|
"loss": 0.003, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 44.86, |
|
"learning_rate": 0.0001763668430335097, |
|
"loss": 0.0028, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 44.93, |
|
"learning_rate": 8.818342151675486e-05, |
|
"loss": 0.0029, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0026, |
|
"step": 6300 |
|
} |
|
], |
|
"max_steps": 6300, |
|
"num_train_epochs": 45, |
|
"total_flos": 7.685698226686525e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|