|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 4538, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.4052863436123357e-07, |
|
"loss": 8.0371, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.2026431718061677e-06, |
|
"loss": 7.6777, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.4052863436123355e-06, |
|
"loss": 8.1375, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.607929515418502e-06, |
|
"loss": 7.9645, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.810572687224671e-06, |
|
"loss": 7.4338, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.1013215859030838e-05, |
|
"loss": 7.9741, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.3215859030837005e-05, |
|
"loss": 8.1339, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.5418502202643173e-05, |
|
"loss": 7.8998, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.7621145374449342e-05, |
|
"loss": 7.5467, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9823788546255507e-05, |
|
"loss": 7.024, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.2026431718061676e-05, |
|
"loss": 6.8792, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.422907488986784e-05, |
|
"loss": 6.7453, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.643171806167401e-05, |
|
"loss": 6.1152, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.8634361233480178e-05, |
|
"loss": 6.4661, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.0837004405286347e-05, |
|
"loss": 6.093, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.3039647577092515e-05, |
|
"loss": 5.5373, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.5242290748898684e-05, |
|
"loss": 6.0057, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.744493392070485e-05, |
|
"loss": 6.0701, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.9647577092511014e-05, |
|
"loss": 5.7831, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.185022026431718e-05, |
|
"loss": 5.8034, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.405286343612335e-05, |
|
"loss": 5.1736, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.625550660792951e-05, |
|
"loss": 5.4872, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.845814977973568e-05, |
|
"loss": 5.2194, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.066079295154186e-05, |
|
"loss": 4.8431, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.286343612334802e-05, |
|
"loss": 4.7448, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.506607929515418e-05, |
|
"loss": 4.9598, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.7268722466960356e-05, |
|
"loss": 5.1425, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.947136563876652e-05, |
|
"loss": 4.9857, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.167400881057269e-05, |
|
"loss": 4.6542, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.387665198237885e-05, |
|
"loss": 4.8413, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 6.607929515418503e-05, |
|
"loss": 4.7463, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 6.828193832599119e-05, |
|
"loss": 4.6509, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.048458149779737e-05, |
|
"loss": 4.8795, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.268722466960352e-05, |
|
"loss": 4.4858, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.48898678414097e-05, |
|
"loss": 4.5239, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.709251101321586e-05, |
|
"loss": 4.739, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.929515418502203e-05, |
|
"loss": 4.5952, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.14977973568282e-05, |
|
"loss": 4.4029, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.370044052863437e-05, |
|
"loss": 4.5299, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.590308370044053e-05, |
|
"loss": 4.3894, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.81057268722467e-05, |
|
"loss": 4.588, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.030837004405287e-05, |
|
"loss": 4.2276, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.251101321585903e-05, |
|
"loss": 4.2741, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.471365638766521e-05, |
|
"loss": 3.8694, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.691629955947136e-05, |
|
"loss": 4.3776, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.911894273127755e-05, |
|
"loss": 4.3895, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00010132158590308371, |
|
"loss": 4.3406, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00010352422907488988, |
|
"loss": 4.3947, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00010572687224669604, |
|
"loss": 4.3829, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001079295154185022, |
|
"loss": 4.3668, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00011013215859030836, |
|
"loss": 4.111, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00011233480176211454, |
|
"loss": 3.8334, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00011453744493392071, |
|
"loss": 4.0465, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00011674008810572688, |
|
"loss": 3.6863, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00011894273127753304, |
|
"loss": 4.3514, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00012114537444933922, |
|
"loss": 3.9275, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00012334801762114539, |
|
"loss": 4.0648, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00012555066079295154, |
|
"loss": 3.5522, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001277533039647577, |
|
"loss": 3.8612, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001299559471365639, |
|
"loss": 3.7846, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00013215859030837006, |
|
"loss": 3.9351, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00013436123348017622, |
|
"loss": 3.8342, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00013656387665198237, |
|
"loss": 3.8025, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00013876651982378855, |
|
"loss": 3.7136, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00014096916299559473, |
|
"loss": 3.6827, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001431718061674009, |
|
"loss": 3.3543, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00014537444933920705, |
|
"loss": 3.5532, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00014757709251101323, |
|
"loss": 3.3296, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001497797356828194, |
|
"loss": 3.3282, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00015198237885462556, |
|
"loss": 3.7764, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00015418502202643172, |
|
"loss": 3.2611, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00015638766519823787, |
|
"loss": 3.2127, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00015859030837004406, |
|
"loss": 3.1679, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00016079295154185024, |
|
"loss": 3.4743, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001629955947136564, |
|
"loss": 3.3912, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00016519823788546255, |
|
"loss": 3.2843, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00016740088105726873, |
|
"loss": 3.6252, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001696035242290749, |
|
"loss": 3.174, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00017180616740088107, |
|
"loss": 3.4322, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00017400881057268722, |
|
"loss": 3.3303, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001762114537444934, |
|
"loss": 3.3238, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001784140969162996, |
|
"loss": 2.9926, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018061674008810574, |
|
"loss": 3.1562, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001828193832599119, |
|
"loss": 3.0285, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018502202643171805, |
|
"loss": 3.1948, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018722466960352423, |
|
"loss": 3.2688, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018942731277533042, |
|
"loss": 3.2674, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019162995594713657, |
|
"loss": 3.136, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019383259911894273, |
|
"loss": 3.017, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001960352422907489, |
|
"loss": 3.071, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001982378854625551, |
|
"loss": 2.8048, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019999997041318193, |
|
"loss": 2.8113, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001999989348763872, |
|
"loss": 3.0145, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001999964200161957, |
|
"loss": 3.1626, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001999924258698108, |
|
"loss": 2.5559, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019998695249631943, |
|
"loss": 3.0745, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001999799999766913, |
|
"loss": 2.8221, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001999715684137778, |
|
"loss": 2.9143, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019996165793231038, |
|
"loss": 2.8685, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001999502686788986, |
|
"loss": 2.9853, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019993740082202818, |
|
"loss": 2.8111, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019992305455205836, |
|
"loss": 3.3157, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019990723008121917, |
|
"loss": 2.8514, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019988992764360814, |
|
"loss": 2.8086, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.000199871147495187, |
|
"loss": 3.0512, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001998508899137779, |
|
"loss": 3.0319, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019982915519905912, |
|
"loss": 2.8135, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019980594367256086, |
|
"loss": 2.8908, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019978125567766023, |
|
"loss": 2.9204, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001997550915795765, |
|
"loss": 3.1027, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019972745176536537, |
|
"loss": 2.969, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019969833664391343, |
|
"loss": 2.8162, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019966774664593206, |
|
"loss": 2.9871, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019963568222395111, |
|
"loss": 2.6507, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019960214385231217, |
|
"loss": 2.508, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001995671320271614, |
|
"loss": 2.8594, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001995306472664425, |
|
"loss": 2.7387, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001994926901098888, |
|
"loss": 2.7499, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019945326111901542, |
|
"loss": 2.4711, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019941236087711077, |
|
"loss": 2.87, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019936998998922826, |
|
"loss": 2.9676, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001993261490821769, |
|
"loss": 2.6321, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001992808388045125, |
|
"loss": 2.6894, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019923405982652767, |
|
"loss": 2.786, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001991858128402422, |
|
"loss": 2.6501, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001991360985593926, |
|
"loss": 2.6592, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001990849177194217, |
|
"loss": 2.4167, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019903227107746774, |
|
"loss": 2.6244, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019897815941235307, |
|
"loss": 2.7636, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019892258352457277, |
|
"loss": 2.7828, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001988655442362827, |
|
"loss": 2.8655, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019880704239128732, |
|
"loss": 2.7669, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019874707885502745, |
|
"loss": 2.5676, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019868565451456704, |
|
"loss": 2.5646, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001986227702785805, |
|
"loss": 2.8244, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001985584270773389, |
|
"loss": 2.6453, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019849262586269642, |
|
"loss": 3.1961, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001984253676080762, |
|
"loss": 2.7, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019835665330845595, |
|
"loss": 2.7882, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019828648398035315, |
|
"loss": 2.7521, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001982148606618102, |
|
"loss": 2.3425, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019814178441237887, |
|
"loss": 2.9472, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019806725631310476, |
|
"loss": 2.6193, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019799127746651117, |
|
"loss": 2.6507, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.000197913848996583, |
|
"loss": 2.7298, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019783497204874985, |
|
"loss": 2.7898, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019775464778986934, |
|
"loss": 2.5467, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019767287740820965, |
|
"loss": 2.615, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019758966211343206, |
|
"loss": 2.6105, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.000197505003136573, |
|
"loss": 2.6392, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001974189017300259, |
|
"loss": 2.659, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019733135916752252, |
|
"loss": 2.5102, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019724237674411432, |
|
"loss": 2.6638, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001971519557761531, |
|
"loss": 2.858, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019706009760127164, |
|
"loss": 2.6797, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001969668035783639, |
|
"loss": 2.6023, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019687207508756486, |
|
"loss": 2.5694, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019677591353023025, |
|
"loss": 2.8703, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019667832032891554, |
|
"loss": 2.5892, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001965792969273552, |
|
"loss": 2.6138, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019647884479044123, |
|
"loss": 2.7579, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019637696540420136, |
|
"loss": 2.5881, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019627366027577726, |
|
"loss": 2.4141, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019616893093340218, |
|
"loss": 2.8274, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019606277892637823, |
|
"loss": 2.5646, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019595520582505363, |
|
"loss": 2.4943, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019584621322079942, |
|
"loss": 2.3327, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019573580272598583, |
|
"loss": 2.6538, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019562397597395857, |
|
"loss": 2.9938, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001955107346190145, |
|
"loss": 2.5367, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001953960803363774, |
|
"loss": 2.3441, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019528001482217296, |
|
"loss": 2.5851, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001951625397934037, |
|
"loss": 2.4576, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001950436569879238, |
|
"loss": 2.5669, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001949233681644131, |
|
"loss": 2.3772, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019480167510235128, |
|
"loss": 2.5839, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019467857960199142, |
|
"loss": 2.5563, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019455408348433354, |
|
"loss": 2.6866, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019442818859109737, |
|
"loss": 2.5574, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019430089678469542, |
|
"loss": 2.3997, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019417220994820514, |
|
"loss": 2.6884, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019404212998534135, |
|
"loss": 2.5868, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019391065882042786, |
|
"loss": 2.4245, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001937777983983691, |
|
"loss": 2.6269, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019364355068462126, |
|
"loss": 2.4964, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019350791766516344, |
|
"loss": 2.5777, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019337090134646787, |
|
"loss": 2.4597, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019323250375547068, |
|
"loss": 2.5093, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001930927269395416, |
|
"loss": 2.3503, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001929515729664538, |
|
"loss": 2.5458, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019280904392435328, |
|
"loss": 2.6002, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019266514192172794, |
|
"loss": 2.4127, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019251986908737646, |
|
"loss": 2.4922, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019237322757037676, |
|
"loss": 2.4938, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019222521954005424, |
|
"loss": 2.6074, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001920758471859496, |
|
"loss": 2.6341, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019192511271778656, |
|
"loss": 2.4591, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001917730183654392, |
|
"loss": 2.6226, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019161956637889872, |
|
"loss": 2.6092, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019146475902824048, |
|
"loss": 2.4191, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019130859860359026, |
|
"loss": 2.4157, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019115108741509036, |
|
"loss": 2.6189, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001909922277928654, |
|
"loss": 2.3266, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00019083202208698805, |
|
"loss": 2.36, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00019067047266744396, |
|
"loss": 2.4632, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00019050758192409707, |
|
"loss": 2.6295, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001903433522666538, |
|
"loss": 2.5695, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001901777861246278, |
|
"loss": 2.4457, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001900108859473039, |
|
"loss": 2.5195, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001898426542037017, |
|
"loss": 2.3216, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001896730933825393, |
|
"loss": 2.6117, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001895022059921963, |
|
"loss": 2.5559, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00018932999456067675, |
|
"loss": 2.524, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00018915646163557179, |
|
"loss": 2.3177, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00018898160978402198, |
|
"loss": 2.4404, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00018880544159267912, |
|
"loss": 2.4747, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00018862795966766833, |
|
"loss": 2.6981, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0001884491666345492, |
|
"loss": 2.4396, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00018826906513827704, |
|
"loss": 2.3892, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00018808765784316384, |
|
"loss": 2.1981, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0001879049474328387, |
|
"loss": 2.5497, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00018772093661020825, |
|
"loss": 2.3756, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00018753562809741673, |
|
"loss": 2.4149, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00018734902463580542, |
|
"loss": 2.483, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00018716112898587247, |
|
"loss": 2.3027, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00018697194392723173, |
|
"loss": 2.3358, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001867814722585719, |
|
"loss": 2.5705, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001865897167976149, |
|
"loss": 2.6456, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00018639668038107437, |
|
"loss": 2.4446, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00018620236586461357, |
|
"loss": 2.4226, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001860067761228033, |
|
"loss": 2.3209, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00018580991404907912, |
|
"loss": 2.6875, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00018561178255569879, |
|
"loss": 2.2906, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00018541238457369904, |
|
"loss": 2.1676, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00018521172305285236, |
|
"loss": 2.2646, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001850098009616231, |
|
"loss": 2.3756, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00018480662128712389, |
|
"loss": 2.4612, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00018460218703507118, |
|
"loss": 2.6361, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00018439650122974087, |
|
"loss": 2.6342, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00018418956691392368, |
|
"loss": 2.2129, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00018398138714887993, |
|
"loss": 2.393, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00018377196501429438, |
|
"loss": 2.4947, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00018356130360823068, |
|
"loss": 2.1074, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001833494060470855, |
|
"loss": 2.1386, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001831362754655424, |
|
"loss": 2.4856, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00018292191501652553, |
|
"loss": 2.5449, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00018270632787115295, |
|
"loss": 2.3776, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00018248951721868973, |
|
"loss": 2.5301, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00018227148626650072, |
|
"loss": 2.3679, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00018205223824000318, |
|
"loss": 2.3381, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00018183177638261895, |
|
"loss": 2.3739, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00018161010395572667, |
|
"loss": 2.4117, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00018138722423861333, |
|
"loss": 2.3288, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00018116314052842584, |
|
"loss": 2.6229, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00018093785614012228, |
|
"loss": 2.2574, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00018071137440642276, |
|
"loss": 2.4681, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00018048369867776029, |
|
"loss": 2.2659, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00018025483232223105, |
|
"loss": 2.5531, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001800247787255447, |
|
"loss": 2.323, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00017979354129097407, |
|
"loss": 2.3548, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00017956112343930512, |
|
"loss": 2.331, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00017932752860878618, |
|
"loss": 2.4505, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00017909276025507696, |
|
"loss": 2.6869, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001788568218511977, |
|
"loss": 2.3611, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00017861971688747747, |
|
"loss": 2.4549, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00017838144887150298, |
|
"loss": 2.5236, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001781420213280662, |
|
"loss": 2.2486, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00017790143779911256, |
|
"loss": 2.5255, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00017765970184368835, |
|
"loss": 2.339, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00017741681703788826, |
|
"loss": 2.4522, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001771727869748023, |
|
"loss": 2.6055, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00017692761526446268, |
|
"loss": 2.317, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00017668130553379063, |
|
"loss": 2.1768, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00017643386142654236, |
|
"loss": 2.3331, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0001761852866032554, |
|
"loss": 2.4262, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001759355847411945, |
|
"loss": 2.3873, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00017568475953429706, |
|
"loss": 2.5427, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00017543281469311857, |
|
"loss": 2.4491, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00017517975394477765, |
|
"loss": 2.5059, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00017492558103290114, |
|
"loss": 2.3825, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00017467029971756837, |
|
"loss": 2.4185, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0001744139137752558, |
|
"loss": 2.3802, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00017415642699878108, |
|
"loss": 2.531, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00017389784319724687, |
|
"loss": 2.3635, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00017363816619598462, |
|
"loss": 2.2934, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001733773998364979, |
|
"loss": 2.184, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00017311554797640552, |
|
"loss": 2.3262, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001728526144893846, |
|
"loss": 2.4294, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00017258860326511318, |
|
"loss": 2.3538, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00017232351820921263, |
|
"loss": 2.1007, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00017205736324318999, |
|
"loss": 2.2829, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00017179014230437988, |
|
"loss": 2.496, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00017152185934588623, |
|
"loss": 2.3997, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00017125251833652398, |
|
"loss": 2.2981, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00017098212326076008, |
|
"loss": 2.0833, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00017071067811865476, |
|
"loss": 2.542, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00017043818692580228, |
|
"loss": 2.2825, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0001701646537132716, |
|
"loss": 2.4096, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00016989008252754655, |
|
"loss": 2.119, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00016961447743046623, |
|
"loss": 2.5423, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00016933784249916476, |
|
"loss": 2.5358, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00016906018182601096, |
|
"loss": 1.9538, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0001687814995185479, |
|
"loss": 2.3082, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00016850179969943207, |
|
"loss": 2.1142, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00016822108650637238, |
|
"loss": 2.3758, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00016793936409206905, |
|
"loss": 2.4226, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00016765663662415204, |
|
"loss": 2.407, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00016737290828511948, |
|
"loss": 2.2467, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00016708818327227574, |
|
"loss": 2.4492, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00016680246579766944, |
|
"loss": 2.4088, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00016651576008803112, |
|
"loss": 2.3473, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00016622807038471052, |
|
"loss": 2.3, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00016593940094361407, |
|
"loss": 2.3139, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00016564975603514183, |
|
"loss": 2.3906, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00016535913994412436, |
|
"loss": 2.0856, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0001650675569697593, |
|
"loss": 2.1938, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0001647750114255477, |
|
"loss": 2.418, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00016448150763923036, |
|
"loss": 2.2186, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00016418704995272373, |
|
"loss": 2.3271, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00016389164272205566, |
|
"loss": 2.4117, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00016359529031730093, |
|
"loss": 2.1018, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00016329799712251674, |
|
"loss": 2.199, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00016299976753567772, |
|
"loss": 2.2997, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00016270060596861087, |
|
"loss": 2.5793, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00016240051684693042, |
|
"loss": 2.2919, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00016209950460997226, |
|
"loss": 2.2764, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00016179757371072824, |
|
"loss": 2.4133, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00016149472861578038, |
|
"loss": 2.3608, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001611909738052347, |
|
"loss": 2.2998, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001608863137726551, |
|
"loss": 2.0379, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00016058075302499673, |
|
"loss": 2.391, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00016027429608253934, |
|
"loss": 2.3764, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0001599669474788205, |
|
"loss": 2.1648, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00015965871176056837, |
|
"loss": 2.207, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00015934959348763467, |
|
"loss": 2.3042, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00015903959723292705, |
|
"loss": 2.4253, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00015872872758234148, |
|
"loss": 2.4448, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001584169891346944, |
|
"loss": 2.2236, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001581043865016547, |
|
"loss": 2.1879, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00015779092430767556, |
|
"loss": 2.3603, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00015747660718992598, |
|
"loss": 2.3299, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00015716143979822207, |
|
"loss": 2.1691, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00015684542679495847, |
|
"loss": 2.4917, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00015652857285503938, |
|
"loss": 2.4864, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00015621088266580904, |
|
"loss": 2.3011, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00015589236092698286, |
|
"loss": 2.0229, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00015557301235057767, |
|
"loss": 2.3556, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00015525284166084194, |
|
"loss": 2.3913, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0001549318535941861, |
|
"loss": 2.5327, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0001546100528991123, |
|
"loss": 1.9177, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00015428744433614415, |
|
"loss": 2.2646, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00015396403267775647, |
|
"loss": 2.2946, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0001536398227083046, |
|
"loss": 2.3676, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00015331481922395347, |
|
"loss": 2.272, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00015298902703260692, |
|
"loss": 2.3888, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0001526624509538364, |
|
"loss": 2.1347, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00015233509581880973, |
|
"loss": 2.3202, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00015200696647021955, |
|
"loss": 2.4352, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00015167806776221178, |
|
"loss": 2.2928, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00015134840456031384, |
|
"loss": 2.3575, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00015101798174136247, |
|
"loss": 2.1881, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00015068680419343187, |
|
"loss": 2.1145, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0001503548768157612, |
|
"loss": 2.1411, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00015002220451868203, |
|
"loss": 2.2795, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00014968879222354597, |
|
"loss": 2.2624, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0001493546448626516, |
|
"loss": 2.4409, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0001490197673791717, |
|
"loss": 2.4049, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00014868416472707993, |
|
"loss": 2.2198, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00014834784187107785, |
|
"loss": 2.4333, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00014801080378652103, |
|
"loss": 2.2905, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00014767305545934588, |
|
"loss": 2.0903, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00014733460188599563, |
|
"loss": 2.2965, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0001469954480733465, |
|
"loss": 2.3256, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00014665559903863365, |
|
"loss": 2.1393, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00014631505980937688, |
|
"loss": 2.4199, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00014597383542330636, |
|
"loss": 2.2842, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00014563193092828803, |
|
"loss": 2.428, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.000145289351382249, |
|
"loss": 2.2509, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00014494610185310252, |
|
"loss": 2.4455, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00014460218741867338, |
|
"loss": 2.3054, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00014425761316662241, |
|
"loss": 2.5075, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00014391238419437144, |
|
"loss": 2.2115, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0001435665056090278, |
|
"loss": 2.4274, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0001432199825273089, |
|
"loss": 2.2174, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00014287282007546627, |
|
"loss": 2.2667, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00014252502338921007, |
|
"loss": 2.2216, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0001421765976136328, |
|
"loss": 2.2736, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00014182754790313342, |
|
"loss": 2.4801, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00014147787942134089, |
|
"loss": 2.2007, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00014112759734103808, |
|
"loss": 2.5219, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00014077670684408485, |
|
"loss": 2.2544, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0001404252131213418, |
|
"loss": 2.2832, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00014007312137259307, |
|
"loss": 2.2898, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00013972043680646987, |
|
"loss": 2.3556, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.000139367164640373, |
|
"loss": 2.3909, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.000139013310100396, |
|
"loss": 2.389, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00013865887842124755, |
|
"loss": 1.815, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00013830387484617434, |
|
"loss": 2.2869, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0001379483046268832, |
|
"loss": 2.2448, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00013759217302346358, |
|
"loss": 2.3517, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00013723548530430974, |
|
"loss": 2.1264, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0001368782467460428, |
|
"loss": 2.3207, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00013652046263343262, |
|
"loss": 2.2458, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00013616213825931963, |
|
"loss": 2.1549, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0001358032789245366, |
|
"loss": 2.331, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00013544388993783023, |
|
"loss": 2.2386, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00013508397661578242, |
|
"loss": 2.4434, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00013472354428273193, |
|
"loss": 2.2391, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00013436259827069534, |
|
"loss": 2.1787, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0001340011439192884, |
|
"loss": 1.9544, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0001336391865756468, |
|
"loss": 2.1705, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00013327673159434724, |
|
"loss": 2.4587, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00013291378433732818, |
|
"loss": 2.2278, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0001325503501738105, |
|
"loss": 2.3232, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0001321864344802181, |
|
"loss": 2.2155, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0001318220426400983, |
|
"loss": 2.2087, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00013145718004404223, |
|
"loss": 2.2386, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0001310918520896052, |
|
"loss": 2.3636, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00013072606418122667, |
|
"loss": 1.9883, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00013035982173015034, |
|
"loss": 2.2703, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0001299931301543442, |
|
"loss": 2.0428, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0001296259948784203, |
|
"loss": 2.2012, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00012925842133355454, |
|
"loss": 2.1794, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0001288904149574063, |
|
"loss": 2.1578, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00012852198119403798, |
|
"loss": 2.0021, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0001281531254938345, |
|
"loss": 2.2277, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0001277838533134226, |
|
"loss": 2.4797, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0001274141701155903, |
|
"loss": 2.1755, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00012704408136920585, |
|
"loss": 2.4891, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00012667359254913698, |
|
"loss": 2.2988, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00012630270913616985, |
|
"loss": 2.5418, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00012593143661692808, |
|
"loss": 2.35, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00012555978048379133, |
|
"loss": 2.0868, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00012518774623481433, |
|
"loss": 2.4799, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0001248153393736454, |
|
"loss": 2.2664, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.000124442565409445, |
|
"loss": 2.2299, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00012406942985680437, |
|
"loss": 2.2555, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00012369593823566383, |
|
"loss": 2.1997, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00012332209607123117, |
|
"loss": 2.2222, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00012294790889389994, |
|
"loss": 2.3091, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0001225733822391675, |
|
"loss": 2.0859, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0001221985216475533, |
|
"loss": 2.3683, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00012182333266451684, |
|
"loss": 2.1938, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00012144782084037571, |
|
"loss": 2.0238, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00012107199173022327, |
|
"loss": 2.345, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00012069585089384674, |
|
"loss": 2.1972, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00012031940389564478, |
|
"loss": 2.1552, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00011994265630454524, |
|
"loss": 2.2012, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00011956561369392274, |
|
"loss": 2.265, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001191882816415162, |
|
"loss": 2.2672, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00011881066572934644, |
|
"loss": 2.2446, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00011843277154363353, |
|
"loss": 2.2362, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0001180546046747141, |
|
"loss": 2.1197, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00011767617071695867, |
|
"loss": 1.9836, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.000117297475268689, |
|
"loss": 2.1665, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00011691852393209508, |
|
"loss": 2.4497, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00011653932231315245, |
|
"loss": 2.0536, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0001161598760215391, |
|
"loss": 2.3549, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0001157801906705526, |
|
"loss": 2.1831, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00011540027187702698, |
|
"loss": 2.1216, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00011502012526124978, |
|
"loss": 2.2128, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00011463975644687867, |
|
"loss": 2.1899, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00011425917106085844, |
|
"loss": 2.1396, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00011387837473333774, |
|
"loss": 2.5057, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00011349737309758572, |
|
"loss": 2.0298, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00011311617178990879, |
|
"loss": 2.2519, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0001127347764495671, |
|
"loss": 2.1852, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00011235319271869129, |
|
"loss": 2.2552, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00011197142624219887, |
|
"loss": 2.1774, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00011158948266771086, |
|
"loss": 2.2011, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00011120736764546799, |
|
"loss": 2.2846, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00011082508682824748, |
|
"loss": 2.1659, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0001104426458712791, |
|
"loss": 2.1766, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00011006005043216172, |
|
"loss": 1.9432, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00010967730617077938, |
|
"loss": 2.1685, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00010929441874921789, |
|
"loss": 2.3591, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00010891139383168072, |
|
"loss": 2.3673, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00010852823708440551, |
|
"loss": 2.3499, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00010814495417557997, |
|
"loss": 2.2054, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00010776155077525827, |
|
"loss": 2.2129, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00010737803255527702, |
|
"loss": 2.1971, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00010699440518917145, |
|
"loss": 2.1717, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00010661067435209135, |
|
"loss": 1.9133, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00010622684572071724, |
|
"loss": 2.1657, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00010584292497317633, |
|
"loss": 2.2476, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00010545891778895862, |
|
"loss": 2.1151, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00010507482984883268, |
|
"loss": 1.9689, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00010469066683476181, |
|
"loss": 2.1355, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00010430643442981986, |
|
"loss": 2.0989, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00010392213831810733, |
|
"loss": 2.113, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00010353778418466697, |
|
"loss": 2.1521, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0001031533777154, |
|
"loss": 2.1597, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00010276892459698182, |
|
"loss": 2.1667, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00010238443051677798, |
|
"loss": 1.9723, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00010199990116275988, |
|
"loss": 2.1176, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00010161534222342083, |
|
"loss": 2.1394, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00010123075938769187, |
|
"loss": 2.1762, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00010084615834485733, |
|
"loss": 2.1752, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00010046154478447114, |
|
"loss": 2.1837, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00010007692439627224, |
|
"loss": 2.2389, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.96923028701006e-05, |
|
"loss": 2.1724, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.93076858958131e-05, |
|
"loss": 2.2438, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.892307916319919e-05, |
|
"loss": 2.4101, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.853848836189691e-05, |
|
"loss": 2.134, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.815391918130848e-05, |
|
"loss": 2.2203, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.776937731051641e-05, |
|
"loss": 2.2268, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.738486843819919e-05, |
|
"loss": 2.4168, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.700039825254703e-05, |
|
"loss": 2.2746, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.661597244117802e-05, |
|
"loss": 2.216, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.623159669105364e-05, |
|
"loss": 2.1087, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.584727668839487e-05, |
|
"loss": 2.2089, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.5463018118598e-05, |
|
"loss": 2.2306, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.507882666615049e-05, |
|
"loss": 2.2674, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.469470801454688e-05, |
|
"loss": 2.1774, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.431066784620486e-05, |
|
"loss": 1.9788, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.392671184238092e-05, |
|
"loss": 2.1459, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.354284568308665e-05, |
|
"loss": 1.9707, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.31590750470044e-05, |
|
"loss": 2.1999, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.277540561140342e-05, |
|
"loss": 2.2249, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.239184305205594e-05, |
|
"loss": 2.0383, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 9.2008393043153e-05, |
|
"loss": 2.0296, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 9.162506125722082e-05, |
|
"loss": 2.4062, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 9.124185336503656e-05, |
|
"loss": 2.253, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 9.085877503554455e-05, |
|
"loss": 2.0702, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 9.04758319357726e-05, |
|
"loss": 2.1399, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 9.009302973074783e-05, |
|
"loss": 2.2487, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.971037408341319e-05, |
|
"loss": 2.0532, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.932787065454352e-05, |
|
"loss": 2.2577, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.894552510266172e-05, |
|
"loss": 2.3149, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.856334308395536e-05, |
|
"loss": 1.9829, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.818133025219258e-05, |
|
"loss": 2.1338, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.77994922586388e-05, |
|
"loss": 2.1417, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.741783475197301e-05, |
|
"loss": 2.1236, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.703636337820407e-05, |
|
"loss": 1.9999, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.665508378058737e-05, |
|
"loss": 2.5837, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.627400159954136e-05, |
|
"loss": 2.1618, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.589312247256385e-05, |
|
"loss": 2.2525, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.551245203414901e-05, |
|
"loss": 2.0536, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.51319959157036e-05, |
|
"loss": 2.0885, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.475175974546394e-05, |
|
"loss": 1.9987, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.437174914841261e-05, |
|
"loss": 2.0031, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.399196974619516e-05, |
|
"loss": 2.2783, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.36124271570369e-05, |
|
"loss": 2.1375, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.323312699566008e-05, |
|
"loss": 2.1599, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.285407487320042e-05, |
|
"loss": 2.274, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.24752763971244e-05, |
|
"loss": 2.0749, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.209673717114618e-05, |
|
"loss": 2.3841, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 8.171846279514476e-05, |
|
"loss": 2.1772, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 8.134045886508108e-05, |
|
"loss": 2.008, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 8.096273097291526e-05, |
|
"loss": 2.2219, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 8.058528470652396e-05, |
|
"loss": 2.1922, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 8.020812564961754e-05, |
|
"loss": 2.1488, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.983125938165758e-05, |
|
"loss": 2.2547, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.94546914777744e-05, |
|
"loss": 2.0456, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.907842750868441e-05, |
|
"loss": 2.0717, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.870247304060776e-05, |
|
"loss": 2.1157, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.832683363518621e-05, |
|
"loss": 2.2051, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.795151484940047e-05, |
|
"loss": 2.2932, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.757652223548836e-05, |
|
"loss": 2.2374, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.720186134086241e-05, |
|
"loss": 1.9577, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.682753770802791e-05, |
|
"loss": 2.2365, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.645355687450095e-05, |
|
"loss": 2.2679, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.607992437272642e-05, |
|
"loss": 2.1363, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.570664572999616e-05, |
|
"loss": 2.2174, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.533372646836736e-05, |
|
"loss": 2.3122, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.496117210458063e-05, |
|
"loss": 2.3869, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.458898814997852e-05, |
|
"loss": 2.0335, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.421718011042406e-05, |
|
"loss": 1.883, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.384575348621909e-05, |
|
"loss": 2.2373, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.347471377202314e-05, |
|
"loss": 2.3576, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.31040664567719e-05, |
|
"loss": 2.2499, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.273381702359632e-05, |
|
"loss": 2.2617, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.236397094974119e-05, |
|
"loss": 2.1201, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.199453370648427e-05, |
|
"loss": 2.0605, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.162551075905538e-05, |
|
"loss": 2.0576, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.125690756655545e-05, |
|
"loss": 2.0743, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.088872958187578e-05, |
|
"loss": 2.2857, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.052098225161756e-05, |
|
"loss": 1.9972, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.015367101601091e-05, |
|
"loss": 2.0777, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.978680130883488e-05, |
|
"loss": 2.3187, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.942037855733661e-05, |
|
"loss": 2.165, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.905440818215133e-05, |
|
"loss": 2.0252, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.868889559722213e-05, |
|
"loss": 2.1558, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.832384620971977e-05, |
|
"loss": 2.2264, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 6.795926541996273e-05, |
|
"loss": 2.1026, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 6.759515862133745e-05, |
|
"loss": 1.9323, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 6.723153120021833e-05, |
|
"loss": 2.2133, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 6.686838853588815e-05, |
|
"loss": 2.2056, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.65057360004586e-05, |
|
"loss": 2.2081, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.614357895879057e-05, |
|
"loss": 2.2055, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.578192276841501e-05, |
|
"loss": 2.2554, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.542077277945353e-05, |
|
"loss": 2.0585, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.506013433453926e-05, |
|
"loss": 2.3229, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.470001276873803e-05, |
|
"loss": 2.1304, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.434041340946909e-05, |
|
"loss": 2.1815, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.398134157642659e-05, |
|
"loss": 1.9168, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.362280258150074e-05, |
|
"loss": 2.332, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.326480172869921e-05, |
|
"loss": 2.2381, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.29073443140689e-05, |
|
"loss": 2.2526, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.255043562561719e-05, |
|
"loss": 2.2872, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.219408094323415e-05, |
|
"loss": 2.364, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.183828553861409e-05, |
|
"loss": 1.9686, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.148305467517768e-05, |
|
"loss": 2.3009, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.112839360799422e-05, |
|
"loss": 2.1697, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.077430758370376e-05, |
|
"loss": 2.1991, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.042080184043941e-05, |
|
"loss": 2.223, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 6.0067881607750134e-05, |
|
"loss": 2.0575, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.971555210652304e-05, |
|
"loss": 2.2195, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.936381854890646e-05, |
|
"loss": 2.1538, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.9012686138232585e-05, |
|
"loss": 2.2694, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.8662160068940655e-05, |
|
"loss": 2.2146, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.831224552650003e-05, |
|
"loss": 2.0831, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.796294768733362e-05, |
|
"loss": 2.3443, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.761427171874103e-05, |
|
"loss": 2.2026, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.726622277882243e-05, |
|
"loss": 2.3855, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.691880601640204e-05, |
|
"loss": 2.1517, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.657202657095206e-05, |
|
"loss": 1.9031, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.622588957251665e-05, |
|
"loss": 2.0287, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.588040014163585e-05, |
|
"loss": 2.0336, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.5535563389270196e-05, |
|
"loss": 2.3351, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5.519138441672471e-05, |
|
"loss": 2.2632, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5.484786831557369e-05, |
|
"loss": 2.2373, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5.4505020167585396e-05, |
|
"loss": 2.3092, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5.416284504464657e-05, |
|
"loss": 1.9927, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5.3821348008687967e-05, |
|
"loss": 2.2656, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5.3480534111608805e-05, |
|
"loss": 2.1348, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5.314040839520253e-05, |
|
"loss": 2.2229, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5.2800975891082126e-05, |
|
"loss": 2.1983, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5.2462241620605366e-05, |
|
"loss": 2.1517, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.21242105948011e-05, |
|
"loss": 2.4824, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.178688781429455e-05, |
|
"loss": 2.0474, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.145027826923371e-05, |
|
"loss": 2.3389, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.111438693921536e-05, |
|
"loss": 2.1822, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.0779218793211524e-05, |
|
"loss": 2.0217, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.044477878949571e-05, |
|
"loss": 2.3117, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.011107187556989e-05, |
|
"loss": 1.9003, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.97781029880911e-05, |
|
"loss": 2.1841, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.944587705279846e-05, |
|
"loss": 2.2673, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.911439898444036e-05, |
|
"loss": 1.9417, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.8783673686701514e-05, |
|
"loss": 2.0593, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.845370605213091e-05, |
|
"loss": 2.3553, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.812450096206884e-05, |
|
"loss": 2.3565, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.779606328657513e-05, |
|
"loss": 2.1688, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.7468397884356966e-05, |
|
"loss": 2.2295, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.71415096026968e-05, |
|
"loss": 2.3155, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.681540327738113e-05, |
|
"loss": 2.0892, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.64900837326284e-05, |
|
"loss": 2.245, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.6165555781018046e-05, |
|
"loss": 2.3306, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.584182422341915e-05, |
|
"loss": 2.2678, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.5518893848919286e-05, |
|
"loss": 1.9734, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.519676943475408e-05, |
|
"loss": 2.0935, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.4875455746236004e-05, |
|
"loss": 2.1692, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.455495753668428e-05, |
|
"loss": 2.2828, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.4235279547354414e-05, |
|
"loss": 2.2103, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.391642650736811e-05, |
|
"loss": 2.3463, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.3598403133643165e-05, |
|
"loss": 2.1223, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.328121413082388e-05, |
|
"loss": 1.9965, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.296486419121137e-05, |
|
"loss": 2.1909, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.264935799469417e-05, |
|
"loss": 2.2577, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.233470020867899e-05, |
|
"loss": 2.1195, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.202089548802157e-05, |
|
"loss": 1.8477, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.170794847495814e-05, |
|
"loss": 2.0003, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.139586379903629e-05, |
|
"loss": 2.0966, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.1084646077046864e-05, |
|
"loss": 1.9883, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.077429991295549e-05, |
|
"loss": 2.0205, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.046482989783441e-05, |
|
"loss": 2.17, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.015624060979486e-05, |
|
"loss": 2.114, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.984853661391888e-05, |
|
"loss": 2.0522, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.9541722462192196e-05, |
|
"loss": 2.2381, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.923580269343673e-05, |
|
"loss": 2.0224, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.893078183324329e-05, |
|
"loss": 2.3678, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.8626664393905024e-05, |
|
"loss": 1.8977, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.832345487435019e-05, |
|
"loss": 2.1492, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.8021157760075945e-05, |
|
"loss": 2.0719, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.7719777523081864e-05, |
|
"loss": 2.1618, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.741931862180384e-05, |
|
"loss": 2.1497, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.7119785501047977e-05, |
|
"loss": 1.8983, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.682118259192502e-05, |
|
"loss": 2.2605, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.652351431178473e-05, |
|
"loss": 2.1666, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.6226785064150524e-05, |
|
"loss": 2.2626, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.593099923865438e-05, |
|
"loss": 2.0513, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.5636161210971705e-05, |
|
"loss": 2.1085, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.5342275342757046e-05, |
|
"loss": 1.9572, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.5049345981578994e-05, |
|
"loss": 2.1665, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.475737746085631e-05, |
|
"loss": 2.2385, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.4466374099793696e-05, |
|
"loss": 2.1853, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.417634020331769e-05, |
|
"loss": 2.0076, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.3887280062013425e-05, |
|
"loss": 2.1504, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.359919795206065e-05, |
|
"loss": 1.9365, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.331209813517082e-05, |
|
"loss": 2.2387, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.302598485852401e-05, |
|
"loss": 1.9987, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.2740862354705834e-05, |
|
"loss": 2.0303, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.245673484164521e-05, |
|
"loss": 2.2559, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 3.2173606522551636e-05, |
|
"loss": 2.0267, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 3.1891481585853224e-05, |
|
"loss": 2.0388, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 3.1610364205134615e-05, |
|
"loss": 1.9447, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 3.133025853907531e-05, |
|
"loss": 2.2349, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.105116873138806e-05, |
|
"loss": 2.0192, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.077309891075766e-05, |
|
"loss": 2.1557, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.0496053190779872e-05, |
|
"loss": 2.2377, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.0220035669900493e-05, |
|
"loss": 2.2612, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.9945050431354814e-05, |
|
"loss": 2.0673, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.9671101543107037e-05, |
|
"loss": 1.9976, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.9398193057790456e-05, |
|
"loss": 2.1429, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.9126329012647048e-05, |
|
"loss": 2.1853, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.8855513429468107e-05, |
|
"loss": 2.2061, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.8585750314534633e-05, |
|
"loss": 2.4006, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.8317043658557906e-05, |
|
"loss": 2.084, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.8049397436620817e-05, |
|
"loss": 2.0911, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.778281560811864e-05, |
|
"loss": 2.0431, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.751730211670075e-05, |
|
"loss": 2.2465, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.725286089021223e-05, |
|
"loss": 2.1271, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6989495840635615e-05, |
|
"loss": 2.152, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6727210864033235e-05, |
|
"loss": 2.2119, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6466009840489436e-05, |
|
"loss": 1.9971, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.620589663405323e-05, |
|
"loss": 1.9645, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.5946875092681134e-05, |
|
"loss": 2.0142, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.5688949048180245e-05, |
|
"loss": 2.1484, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.5432122316151463e-05, |
|
"loss": 2.2044, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.5176398695933233e-05, |
|
"loss": 2.3567, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.4921781970545178e-05, |
|
"loss": 2.0796, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.4668275906632232e-05, |
|
"loss": 2.0059, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.441588425440886e-05, |
|
"loss": 2.2845, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.4164610747603524e-05, |
|
"loss": 2.1271, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3914459103403696e-05, |
|
"loss": 2.2532, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3665433022400517e-05, |
|
"loss": 2.1088, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3417536188534327e-05, |
|
"loss": 2.0666, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3170772269040075e-05, |
|
"loss": 1.994, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.292514491439297e-05, |
|
"loss": 2.1057, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.268065775825473e-05, |
|
"loss": 2.2502, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.2437314417419518e-05, |
|
"loss": 2.0369, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.2195118491760668e-05, |
|
"loss": 2.13, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.195407356417737e-05, |
|
"loss": 2.1939, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.171418320054155e-05, |
|
"loss": 2.3154, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.1475450949645325e-05, |
|
"loss": 2.0751, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.1237880343148352e-05, |
|
"loss": 2.0632, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.100147489552562e-05, |
|
"loss": 2.2255, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.076623810401549e-05, |
|
"loss": 2.2219, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.0532173448567936e-05, |
|
"loss": 2.2883, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.0299284391793006e-05, |
|
"loss": 2.0587, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.0067574378909726e-05, |
|
"loss": 2.1494, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.9837046837695072e-05, |
|
"loss": 2.3035, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.9607705178433124e-05, |
|
"loss": 2.0228, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.9379552793864962e-05, |
|
"loss": 1.9292, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.9152593059138036e-05, |
|
"loss": 2.1265, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.892682933175667e-05, |
|
"loss": 1.9264, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.870226495153199e-05, |
|
"loss": 2.3023, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.847890324053283e-05, |
|
"loss": 2.213, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.8256747503036465e-05, |
|
"loss": 2.0093, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.8035801025479615e-05, |
|
"loss": 2.1466, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.7816067076410138e-05, |
|
"loss": 2.2077, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.759754890643829e-05, |
|
"loss": 2.1645, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.738024974818896e-05, |
|
"loss": 2.1975, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.71641728162537e-05, |
|
"loss": 2.0768, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.6949321307143096e-05, |
|
"loss": 2.1729, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.6735698399239687e-05, |
|
"loss": 2.1934, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.6523307252750787e-05, |
|
"loss": 2.0382, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.6312151009661792e-05, |
|
"loss": 2.1809, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.6102232793689652e-05, |
|
"loss": 2.2001, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.5893555710236784e-05, |
|
"loss": 2.2799, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.5686122846344932e-05, |
|
"loss": 2.2641, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.547993727064968e-05, |
|
"loss": 2.126, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.5275002033335016e-05, |
|
"loss": 2.1068, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.5071320166088066e-05, |
|
"loss": 2.0277, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.4868894682054535e-05, |
|
"loss": 2.1387, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.4667728575793772e-05, |
|
"loss": 2.3061, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.4467824823234843e-05, |
|
"loss": 2.0555, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.426918638163216e-05, |
|
"loss": 2.206, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.407181618952199e-05, |
|
"loss": 2.0053, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.3875717166678892e-05, |
|
"loss": 2.0465, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.3680892214072405e-05, |
|
"loss": 2.0906, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.3487344213824437e-05, |
|
"loss": 2.049, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.3295076029166265e-05, |
|
"loss": 2.0334, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.310409050439645e-05, |
|
"loss": 2.2541, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.2914390464838655e-05, |
|
"loss": 2.339, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.2725978716799791e-05, |
|
"loss": 2.1708, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.2538858047528646e-05, |
|
"loss": 2.1017, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.235303122517455e-05, |
|
"loss": 2.1563, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.2168500998746435e-05, |
|
"loss": 1.9978, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.1985270098072188e-05, |
|
"loss": 2.0827, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.1803341233758291e-05, |
|
"loss": 2.2082, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.1622717097149627e-05, |
|
"loss": 2.08, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.144340036028978e-05, |
|
"loss": 2.0822, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.1265393675881487e-05, |
|
"loss": 2.1352, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.1088699677247238e-05, |
|
"loss": 2.0985, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.0913320978290642e-05, |
|
"loss": 2.011, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.0739260173457355e-05, |
|
"loss": 1.9461, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.056651983769712e-05, |
|
"loss": 2.0538, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.0395102526425282e-05, |
|
"loss": 1.8053, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.0225010775485277e-05, |
|
"loss": 2.1013, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.0056247101110972e-05, |
|
"loss": 2.0568, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 9.888813999889446e-06, |
|
"loss": 2.1572, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 9.72271394872416e-06, |
|
"loss": 2.263, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 9.557949404798183e-06, |
|
"loss": 2.0593, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 9.394522805537931e-06, |
|
"loss": 2.0521, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 9.232436568577085e-06, |
|
"loss": 2.2315, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 9.0716930917208e-06, |
|
"loss": 2.0703, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.912294752910254e-06, |
|
"loss": 2.4333, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.754243910187498e-06, |
|
"loss": 2.213, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.597542901660538e-06, |
|
"loss": 1.992, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.442194045468733e-06, |
|
"loss": 2.0252, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.28819963974855e-06, |
|
"loss": 1.9801, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.135561962599514e-06, |
|
"loss": 2.1347, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.984283272050553e-06, |
|
"loss": 2.1591, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.834365806026578e-06, |
|
"loss": 1.9528, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.68581178231531e-06, |
|
"loss": 1.9756, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 7.538623398534661e-06, |
|
"loss": 2.093, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 7.392802832099965e-06, |
|
"loss": 1.9631, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 7.248352240192002e-06, |
|
"loss": 2.135, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 7.105273759724884e-06, |
|
"loss": 2.1906, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.963569507314627e-06, |
|
"loss": 2.3328, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 6.82324157924773e-06, |
|
"loss": 2.2979, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 6.684292051450147e-06, |
|
"loss": 1.9134, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 6.546722979456643e-06, |
|
"loss": 2.1358, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 6.410536398380385e-06, |
|
"loss": 2.2415, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 6.275734322882776e-06, |
|
"loss": 2.333, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 6.142318747143716e-06, |
|
"loss": 2.001, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 6.010291644832044e-06, |
|
"loss": 2.1209, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.8796549690763645e-06, |
|
"loss": 1.9222, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.750410652436178e-06, |
|
"loss": 2.1773, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5.622560606873262e-06, |
|
"loss": 2.2215, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5.496106723723416e-06, |
|
"loss": 1.8445, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5.371050873668437e-06, |
|
"loss": 2.3022, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5.247394906708469e-06, |
|
"loss": 1.9745, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5.125140652134652e-06, |
|
"loss": 2.0829, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5.004289918502047e-06, |
|
"loss": 1.9314, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.884844493602847e-06, |
|
"loss": 2.0076, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.766806144440039e-06, |
|
"loss": 2.1293, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.650176617201074e-06, |
|
"loss": 2.0244, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.5349576372322845e-06, |
|
"loss": 1.9962, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.421150909013094e-06, |
|
"loss": 2.1399, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.308758116131029e-06, |
|
"loss": 1.8508, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.197780921256678e-06, |
|
"loss": 1.9079, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 4.088220966119105e-06, |
|
"loss": 2.0411, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.9800798714816566e-06, |
|
"loss": 2.1043, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.873359237117868e-06, |
|
"loss": 2.1293, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.768060641787874e-06, |
|
"loss": 2.07, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.6641856432150232e-06, |
|
"loss": 2.0227, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.561735778062847e-06, |
|
"loss": 2.1693, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.4607125619123048e-06, |
|
"loss": 1.9553, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.3611174892393848e-06, |
|
"loss": 2.1492, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.2629520333930075e-06, |
|
"loss": 2.3135, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.1662176465731776e-06, |
|
"loss": 2.1186, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.070915759809567e-06, |
|
"loss": 2.2169, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.97704778294029e-06, |
|
"loss": 1.9161, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.884615104591093e-06, |
|
"loss": 2.0842, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.793619092154787e-06, |
|
"loss": 2.0509, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.7040610917709597e-06, |
|
"loss": 2.0457, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.6159424283062507e-06, |
|
"loss": 2.0591, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.529264405334486e-06, |
|
"loss": 2.0041, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.4440283051176405e-06, |
|
"loss": 2.227, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.3602353885866956e-06, |
|
"loss": 1.8759, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.277886895323078e-06, |
|
"loss": 2.0249, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.1969840435402953e-06, |
|
"loss": 2.1304, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.117528030065907e-06, |
|
"loss": 2.1629, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.039520030323816e-06, |
|
"loss": 2.1015, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.962961198316937e-06, |
|
"loss": 2.0901, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.8878526666100128e-06, |
|
"loss": 2.2332, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.8141955463129912e-06, |
|
"loss": 2.1336, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.7419909270644853e-06, |
|
"loss": 2.0399, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.6712398770156734e-06, |
|
"loss": 1.9857, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.6019434428145686e-06, |
|
"loss": 2.0271, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.5341026495904409e-06, |
|
"loss": 2.1896, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.4677185009387085e-06, |
|
"loss": 2.2911, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.4027919789060818e-06, |
|
"loss": 2.082, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.339324043976009e-06, |
|
"loss": 1.829, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.27731563505451e-06, |
|
"loss": 2.3073, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.2167676694562535e-06, |
|
"loss": 2.22, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.1576810428910012e-06, |
|
"loss": 2.1452, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.1000566294503523e-06, |
|
"loss": 1.9852, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.04389528159482e-06, |
|
"loss": 2.1192, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 9.891978301412308e-07, |
|
"loss": 2.1154, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 9.359650842503565e-07, |
|
"loss": 1.9588, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.841978314150901e-07, |
|
"loss": 2.2275, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 8.338968374486555e-07, |
|
"loss": 2.1793, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.850628464733501e-07, |
|
"loss": 2.1219, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.376965809095193e-07, |
|
"loss": 2.1371, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.917987414648885e-07, |
|
"loss": 1.8774, |
|
"step": 4385 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.473700071241484e-07, |
|
"loss": 2.173, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 6.044110351389743e-07, |
|
"loss": 2.0139, |
|
"step": 4395 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.629224610182671e-07, |
|
"loss": 2.012, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.229048985187279e-07, |
|
"loss": 2.0772, |
|
"step": 4405 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 4.843589396358427e-07, |
|
"loss": 1.9894, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.472851545950563e-07, |
|
"loss": 2.0555, |
|
"step": 4415 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.116840918434006e-07, |
|
"loss": 1.9658, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.7755627804134664e-07, |
|
"loss": 2.1685, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.449022180549766e-07, |
|
"loss": 2.1805, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.1372239494860123e-07, |
|
"loss": 2.0124, |
|
"step": 4435 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.840172699775656e-07, |
|
"loss": 2.0179, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.5578728258138783e-07, |
|
"loss": 2.1432, |
|
"step": 4445 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.290328503773309e-07, |
|
"loss": 2.1226, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.0375436915420764e-07, |
|
"loss": 2.0209, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7995221286645215e-07, |
|
"loss": 2.1942, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.5762673362871293e-07, |
|
"loss": 2.3072, |
|
"step": 4465 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.367782617105351e-07, |
|
"loss": 2.15, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.1740710553153067e-07, |
|
"loss": 2.0169, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 9.951355165678244e-08, |
|
"loss": 2.1183, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 8.309786479264726e-08, |
|
"loss": 2.1434, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.816028778281469e-08, |
|
"loss": 2.0947, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.4701041604710014e-08, |
|
"loss": 2.0434, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 4.272032536621895e-08, |
|
"loss": 2.0497, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.221831630277894e-08, |
|
"loss": 2.207, |
|
"step": 4505 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.3195169774714586e-08, |
|
"loss": 2.1715, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.565101926497281e-08, |
|
"loss": 2.2098, |
|
"step": 4515 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 9.585976377124439e-09, |
|
"loss": 2.1455, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.0001308337210835e-09, |
|
"loss": 2.1535, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8935504749628684e-09, |
|
"loss": 1.9348, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.6628125773253686e-10, |
|
"loss": 2.1515, |
|
"step": 4535 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 4538, |
|
"total_flos": 1.080061286023168e+16, |
|
"train_loss": 2.5026560972944236, |
|
"train_runtime": 41380.2373, |
|
"train_samples_per_second": 0.439, |
|
"train_steps_per_second": 0.11 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 4538, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 1.080061286023168e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|