|
{ |
|
"best_metric": 0.60627281665802, |
|
"best_model_checkpoint": "lora-alpaca-trading-candles/checkpoint-18600", |
|
"epoch": 2.983084826462849, |
|
"global_step": 18600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 2.1224, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 1.5943, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 0.7997, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.6494, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0003, |
|
"loss": 0.6283, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002996775060467616, |
|
"loss": 0.6055, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029935501209352324, |
|
"loss": 0.6061, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002990325181402848, |
|
"loss": 0.6085, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002987100241870465, |
|
"loss": 0.6014, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002983875302338081, |
|
"loss": 0.6009, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 0.6189413070678711, |
|
"eval_runtime": 81.8892, |
|
"eval_samples_per_second": 24.423, |
|
"eval_steps_per_second": 3.053, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002980650362805697, |
|
"loss": 0.6046, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029774254232733135, |
|
"loss": 0.6035, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029742004837409293, |
|
"loss": 0.6036, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029709755442085457, |
|
"loss": 0.6074, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002967750604676162, |
|
"loss": 0.6004, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029645256651437784, |
|
"loss": 0.605, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029613007256113947, |
|
"loss": 0.595, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029580757860790105, |
|
"loss": 0.5994, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002954850846546627, |
|
"loss": 0.5957, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002951625907014243, |
|
"loss": 0.6023, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 0.6173219680786133, |
|
"eval_runtime": 82.3581, |
|
"eval_samples_per_second": 24.284, |
|
"eval_steps_per_second": 3.036, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00029484009674818595, |
|
"loss": 0.5934, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002945176027949476, |
|
"loss": 0.5951, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002941951088417092, |
|
"loss": 0.5996, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002938726148884708, |
|
"loss": 0.5942, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029355012093523243, |
|
"loss": 0.5938, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029322762698199406, |
|
"loss": 0.5964, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002929051330287557, |
|
"loss": 0.5925, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029258263907551733, |
|
"loss": 0.5996, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002922601451222789, |
|
"loss": 0.5976, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00029193765116904054, |
|
"loss": 0.6009, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 0.6116005778312683, |
|
"eval_runtime": 82.3123, |
|
"eval_samples_per_second": 24.298, |
|
"eval_steps_per_second": 3.037, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002916151572158022, |
|
"loss": 0.5986, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002912926632625638, |
|
"loss": 0.5891, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00029097016930932544, |
|
"loss": 0.5983, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000290647675356087, |
|
"loss": 0.5919, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00029032518140284866, |
|
"loss": 0.5969, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002900026874496103, |
|
"loss": 0.5971, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002896801934963719, |
|
"loss": 0.5915, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00028935769954313356, |
|
"loss": 0.5916, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00028903520558989514, |
|
"loss": 0.5945, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002887127116366568, |
|
"loss": 0.5982, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 0.6110595464706421, |
|
"eval_runtime": 82.5778, |
|
"eval_samples_per_second": 24.22, |
|
"eval_steps_per_second": 3.027, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002883902176834184, |
|
"loss": 0.601, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00028806772373018004, |
|
"loss": 0.5915, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00028774522977694167, |
|
"loss": 0.5946, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00028742273582370325, |
|
"loss": 0.5971, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00028710024187046494, |
|
"loss": 0.5961, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002867777479172265, |
|
"loss": 0.6021, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028645525396398815, |
|
"loss": 0.5983, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002861327600107498, |
|
"loss": 0.5955, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002858102660575114, |
|
"loss": 0.5899, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000285487772104273, |
|
"loss": 0.5956, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 0.6105289459228516, |
|
"eval_runtime": 82.6465, |
|
"eval_samples_per_second": 24.199, |
|
"eval_steps_per_second": 3.025, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028516527815103463, |
|
"loss": 0.5972, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028484278419779627, |
|
"loss": 0.5977, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002845202902445579, |
|
"loss": 0.5988, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028419779629131953, |
|
"loss": 0.592, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002838753023380811, |
|
"loss": 0.5935, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028355280838484275, |
|
"loss": 0.5993, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002832303144316044, |
|
"loss": 0.5997, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000282907820478366, |
|
"loss": 0.588, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00028258532652512765, |
|
"loss": 0.5949, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00028226283257188923, |
|
"loss": 0.5915, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 0.6102217435836792, |
|
"eval_runtime": 82.7137, |
|
"eval_samples_per_second": 24.18, |
|
"eval_steps_per_second": 3.022, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00028194033861865086, |
|
"loss": 0.5936, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002816178446654125, |
|
"loss": 0.5951, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00028129535071217413, |
|
"loss": 0.591, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00028097285675893576, |
|
"loss": 0.6245, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00028065036280569734, |
|
"loss": 0.5964, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00028032786885245903, |
|
"loss": 0.5962, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002800053748992206, |
|
"loss": 0.5983, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00027968288094598224, |
|
"loss": 0.5928, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002793603869927439, |
|
"loss": 0.5944, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00027903789303950546, |
|
"loss": 0.594, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 0.6102790236473083, |
|
"eval_runtime": 82.6061, |
|
"eval_samples_per_second": 24.211, |
|
"eval_steps_per_second": 3.026, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00027871539908626714, |
|
"loss": 0.592, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002783929051330287, |
|
"loss": 0.5976, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00027807041117979036, |
|
"loss": 0.5925, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.000277747917226552, |
|
"loss": 0.5939, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00027742542327331357, |
|
"loss": 0.5891, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00027710292932007526, |
|
"loss": 0.6036, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00027678043536683684, |
|
"loss": 0.5949, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00027645794141359847, |
|
"loss": 0.5862, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002761354474603601, |
|
"loss": 0.6025, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00027581295350712174, |
|
"loss": 0.5963, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 0.6100178360939026, |
|
"eval_runtime": 82.3733, |
|
"eval_samples_per_second": 24.28, |
|
"eval_steps_per_second": 3.035, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002754904595538833, |
|
"loss": 0.6012, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00027516796560064495, |
|
"loss": 0.5866, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002748454716474066, |
|
"loss": 0.595, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002745229776941682, |
|
"loss": 0.5929, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00027420048374092985, |
|
"loss": 0.594, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00027387798978769143, |
|
"loss": 0.5904, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00027355549583445306, |
|
"loss": 0.5878, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002732330018812147, |
|
"loss": 0.5981, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00027291050792797633, |
|
"loss": 0.5958, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00027258801397473797, |
|
"loss": 0.589, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 0.609899640083313, |
|
"eval_runtime": 83.0673, |
|
"eval_samples_per_second": 24.077, |
|
"eval_steps_per_second": 3.01, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00027226552002149954, |
|
"loss": 0.6015, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002719430260682612, |
|
"loss": 0.5978, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002716205321150228, |
|
"loss": 0.5923, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00027129803816178445, |
|
"loss": 0.5933, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002709755442085461, |
|
"loss": 0.5846, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00027065305025530766, |
|
"loss": 0.5992, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00027033055630206935, |
|
"loss": 0.5943, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002700080623488309, |
|
"loss": 0.5954, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00026968556839559256, |
|
"loss": 0.5931, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002693630744423542, |
|
"loss": 0.5925, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 0.6094837784767151, |
|
"eval_runtime": 82.8167, |
|
"eval_samples_per_second": 24.15, |
|
"eval_steps_per_second": 3.019, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00026904058048911577, |
|
"loss": 0.595, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00026871808653587746, |
|
"loss": 0.5933, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00026839559258263904, |
|
"loss": 0.5958, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002680730986294007, |
|
"loss": 0.5897, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002677506046761623, |
|
"loss": 0.5889, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00026742811072292394, |
|
"loss": 0.6007, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002671056167696856, |
|
"loss": 0.591, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00026678312281644715, |
|
"loss": 0.5868, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002664606288632088, |
|
"loss": 0.589, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002661381349099704, |
|
"loss": 0.5919, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 0.6094632148742676, |
|
"eval_runtime": 83.078, |
|
"eval_samples_per_second": 24.074, |
|
"eval_steps_per_second": 3.009, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00026581564095673205, |
|
"loss": 0.5983, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00026549314700349363, |
|
"loss": 0.5958, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00026517065305025527, |
|
"loss": 0.5902, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002648481590970169, |
|
"loss": 0.5953, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00026452566514377854, |
|
"loss": 0.5982, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00026420317119054017, |
|
"loss": 0.5902, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00026388067723730175, |
|
"loss": 0.5899, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0002635581832840634, |
|
"loss": 0.5948, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.000263235689330825, |
|
"loss": 0.5896, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00026291319537758665, |
|
"loss": 0.5922, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 0.6090299487113953, |
|
"eval_runtime": 82.9058, |
|
"eval_samples_per_second": 24.124, |
|
"eval_steps_per_second": 3.015, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002625907014243483, |
|
"loss": 0.5976, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00026226820747110986, |
|
"loss": 0.593, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00026194571351787155, |
|
"loss": 0.5987, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00026162321956463313, |
|
"loss": 0.5886, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00026130072561139476, |
|
"loss": 0.5937, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002609782316581564, |
|
"loss": 0.5878, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.000260655737704918, |
|
"loss": 0.5932, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00026033324375167966, |
|
"loss": 0.5968, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00026001074979844124, |
|
"loss": 0.5925, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0002596882558452029, |
|
"loss": 0.5882, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 0.609171986579895, |
|
"eval_runtime": 82.3867, |
|
"eval_samples_per_second": 24.276, |
|
"eval_steps_per_second": 3.034, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0002593657618919645, |
|
"loss": 0.5948, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00025904326793872614, |
|
"loss": 0.5928, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002587207739854878, |
|
"loss": 0.6007, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00025839828003224936, |
|
"loss": 0.5954, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.000258075786079011, |
|
"loss": 0.5976, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002577532921257726, |
|
"loss": 0.5876, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00025743079817253426, |
|
"loss": 0.5906, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002571083042192959, |
|
"loss": 0.5953, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00025678581026605747, |
|
"loss": 0.5923, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0002564633163128191, |
|
"loss": 0.6015, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 0.6090067028999329, |
|
"eval_runtime": 83.1007, |
|
"eval_samples_per_second": 24.067, |
|
"eval_steps_per_second": 3.008, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00025614082235958074, |
|
"loss": 0.5899, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00025581832840634237, |
|
"loss": 0.5953, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.000255495834453104, |
|
"loss": 0.5948, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0002551733404998656, |
|
"loss": 0.5946, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002548508465466272, |
|
"loss": 0.6018, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00025452835259338885, |
|
"loss": 0.5957, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002542058586401505, |
|
"loss": 0.5934, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00025388336468691207, |
|
"loss": 0.5971, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00025356087073367375, |
|
"loss": 0.5903, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00025323837678043533, |
|
"loss": 0.594, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 0.6089543700218201, |
|
"eval_runtime": 83.1486, |
|
"eval_samples_per_second": 24.053, |
|
"eval_steps_per_second": 3.007, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00025291588282719697, |
|
"loss": 0.5916, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002525933888739586, |
|
"loss": 0.5914, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002522708949207202, |
|
"loss": 0.5992, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00025194840096748187, |
|
"loss": 0.6012, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00025162590701424345, |
|
"loss": 0.5863, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0002513034130610051, |
|
"loss": 0.5947, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0002509809191077667, |
|
"loss": 0.5998, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002506584251545283, |
|
"loss": 0.5884, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00025033593120129, |
|
"loss": 0.6021, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00025001343724805156, |
|
"loss": 0.5931, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 0.6089305281639099, |
|
"eval_runtime": 82.7488, |
|
"eval_samples_per_second": 24.17, |
|
"eval_steps_per_second": 3.021, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0002496909432948132, |
|
"loss": 0.5944, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00024936844934157483, |
|
"loss": 0.5886, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00024904595538833646, |
|
"loss": 0.5974, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002487234614350981, |
|
"loss": 0.5928, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002484009674818597, |
|
"loss": 0.5881, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002480784735286213, |
|
"loss": 0.5999, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00024775597957538294, |
|
"loss": 0.5941, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002474334856221446, |
|
"loss": 0.5971, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002471109916689062, |
|
"loss": 0.5908, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002467884977156678, |
|
"loss": 0.6051, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 0.6087088584899902, |
|
"eval_runtime": 82.9672, |
|
"eval_samples_per_second": 24.106, |
|
"eval_steps_per_second": 3.013, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002464660037624294, |
|
"loss": 0.5909, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00024614350980919106, |
|
"loss": 0.5909, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002458210158559527, |
|
"loss": 0.5898, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002454985219027143, |
|
"loss": 0.5891, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002451760279494759, |
|
"loss": 0.5896, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00024485353399623754, |
|
"loss": 0.5899, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00024453104004299917, |
|
"loss": 0.5984, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002442085460897608, |
|
"loss": 0.5969, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002438860521365224, |
|
"loss": 0.5908, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00024356355818328404, |
|
"loss": 0.5929, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 0.6087647080421448, |
|
"eval_runtime": 82.8339, |
|
"eval_samples_per_second": 24.145, |
|
"eval_steps_per_second": 3.018, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00024324106423004565, |
|
"loss": 0.5976, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002429185702768073, |
|
"loss": 0.5862, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00024259607632356892, |
|
"loss": 0.5943, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00024227358237033052, |
|
"loss": 0.5852, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00024195108841709216, |
|
"loss": 0.5984, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00024162859446385376, |
|
"loss": 0.5921, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00024130610051061543, |
|
"loss": 0.6005, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00024098360655737703, |
|
"loss": 0.5953, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00024066111260413864, |
|
"loss": 0.5919, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00024033861865090027, |
|
"loss": 0.5948, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 0.6086417436599731, |
|
"eval_runtime": 82.8938, |
|
"eval_samples_per_second": 24.127, |
|
"eval_steps_per_second": 3.016, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0002400161246976619, |
|
"loss": 0.5918, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00023969363074442354, |
|
"loss": 0.5874, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00023937113679118515, |
|
"loss": 0.5918, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00023904864283794675, |
|
"loss": 0.5906, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0002387261488847084, |
|
"loss": 0.5976, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00023840365493147002, |
|
"loss": 0.5879, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00023808116097823163, |
|
"loss": 0.5982, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00023775866702499326, |
|
"loss": 0.5969, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00023743617307175487, |
|
"loss": 0.5832, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00023711367911851653, |
|
"loss": 0.5839, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 0.6088613271713257, |
|
"eval_runtime": 82.8863, |
|
"eval_samples_per_second": 24.129, |
|
"eval_steps_per_second": 3.016, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00023679118516527813, |
|
"loss": 0.5897, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00023646869121203974, |
|
"loss": 0.5929, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00023614619725880137, |
|
"loss": 0.5914, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.000235823703305563, |
|
"loss": 0.5909, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023550120935232464, |
|
"loss": 0.5918, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023517871539908625, |
|
"loss": 0.5917, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023485622144584785, |
|
"loss": 0.5955, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00023453372749260951, |
|
"loss": 0.5901, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00023421123353937112, |
|
"loss": 0.5876, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00023388873958613273, |
|
"loss": 0.5894, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 0.6085715293884277, |
|
"eval_runtime": 83.2471, |
|
"eval_samples_per_second": 24.025, |
|
"eval_steps_per_second": 3.003, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00023356624563289436, |
|
"loss": 0.5986, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00023324375167965597, |
|
"loss": 0.5943, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00023292125772641763, |
|
"loss": 0.5868, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023259876377317924, |
|
"loss": 0.5888, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023227626981994084, |
|
"loss": 0.5974, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023195377586670248, |
|
"loss": 0.5879, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00023163128191346408, |
|
"loss": 0.5933, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00023130878796022574, |
|
"loss": 0.597, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00023098629400698735, |
|
"loss": 0.5946, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00023066380005374896, |
|
"loss": 0.5951, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 0.6085190176963806, |
|
"eval_runtime": 83.1305, |
|
"eval_samples_per_second": 24.059, |
|
"eval_steps_per_second": 3.007, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00023034130610051062, |
|
"loss": 0.5906, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00023001881214727222, |
|
"loss": 0.5945, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022969631819403386, |
|
"loss": 0.5892, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022937382424079546, |
|
"loss": 0.594, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022905133028755707, |
|
"loss": 0.5931, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022872883633431873, |
|
"loss": 0.5974, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00022840634238108034, |
|
"loss": 0.5871, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00022808384842784194, |
|
"loss": 0.5919, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00022776135447460358, |
|
"loss": 0.5894, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00022743886052136518, |
|
"loss": 0.5862, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 0.6086148023605347, |
|
"eval_runtime": 83.5908, |
|
"eval_samples_per_second": 23.926, |
|
"eval_steps_per_second": 2.991, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00022711636656812684, |
|
"loss": 0.5912, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00022679387261488845, |
|
"loss": 0.6005, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00022647137866165006, |
|
"loss": 0.5891, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0002261488847084117, |
|
"loss": 0.5963, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00022582639075517332, |
|
"loss": 0.5907, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022550389680193496, |
|
"loss": 0.5948, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022518140284869656, |
|
"loss": 0.5978, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022485890889545817, |
|
"loss": 0.5975, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00022453641494221983, |
|
"loss": 0.5934, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00022421392098898144, |
|
"loss": 0.592, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 0.6084710359573364, |
|
"eval_runtime": 82.8821, |
|
"eval_samples_per_second": 24.131, |
|
"eval_steps_per_second": 3.016, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00022389142703574307, |
|
"loss": 0.5936, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00022356893308250468, |
|
"loss": 0.5898, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00022324643912926629, |
|
"loss": 0.5895, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00022292394517602795, |
|
"loss": 0.5885, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022260145122278955, |
|
"loss": 0.5836, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022227895726955116, |
|
"loss": 0.588, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0002219564633163128, |
|
"loss": 0.5933, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00022163396936307443, |
|
"loss": 0.6007, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00022131147540983606, |
|
"loss": 0.5976, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00022098898145659767, |
|
"loss": 0.5886, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 0.6084069609642029, |
|
"eval_runtime": 83.0728, |
|
"eval_samples_per_second": 24.075, |
|
"eval_steps_per_second": 3.009, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00022066648750335927, |
|
"loss": 0.6022, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00022034399355012093, |
|
"loss": 0.5898, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00022002149959688254, |
|
"loss": 0.5878, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00021969900564364417, |
|
"loss": 0.5958, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00021937651169040578, |
|
"loss": 0.5863, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0002190540177371674, |
|
"loss": 0.5897, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00021873152378392905, |
|
"loss": 0.5893, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00021840902983069065, |
|
"loss": 0.5913, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0002180865358774523, |
|
"loss": 0.5924, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0002177640419242139, |
|
"loss": 0.595, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 0.6084703207015991, |
|
"eval_runtime": 83.4269, |
|
"eval_samples_per_second": 23.973, |
|
"eval_steps_per_second": 2.997, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00021744154797097553, |
|
"loss": 0.5887, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00021711905401773716, |
|
"loss": 0.5909, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00021679656006449877, |
|
"loss": 0.5935, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00021647406611126037, |
|
"loss": 0.602, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00021615157215802204, |
|
"loss": 0.5927, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00021582907820478364, |
|
"loss": 0.5963, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00021550658425154528, |
|
"loss": 0.5897, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00021518409029830688, |
|
"loss": 0.5902, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0002148615963450685, |
|
"loss": 0.5879, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00021453910239183015, |
|
"loss": 0.5894, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 0.6084801554679871, |
|
"eval_runtime": 83.4424, |
|
"eval_samples_per_second": 23.969, |
|
"eval_steps_per_second": 2.996, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00021421660843859176, |
|
"loss": 0.5966, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002138941144853534, |
|
"loss": 0.5912, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.000213571620532115, |
|
"loss": 0.5895, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00021324912657887663, |
|
"loss": 0.5961, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00021292663262563826, |
|
"loss": 0.5938, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00021260413867239987, |
|
"loss": 0.5896, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00021228164471916148, |
|
"loss": 0.5999, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00021195915076592314, |
|
"loss": 0.595, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00021163665681268474, |
|
"loss": 0.5957, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00021131416285944638, |
|
"loss": 0.5816, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 0.6085809469223022, |
|
"eval_runtime": 82.9268, |
|
"eval_samples_per_second": 24.118, |
|
"eval_steps_per_second": 3.015, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00021099166890620798, |
|
"loss": 0.597, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0002106691749529696, |
|
"loss": 0.5795, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00021034668099973125, |
|
"loss": 0.5969, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00021002418704649286, |
|
"loss": 0.592, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0002097016930932545, |
|
"loss": 0.5941, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0002093791991400161, |
|
"loss": 0.5919, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0002090567051867777, |
|
"loss": 0.59, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00020873421123353937, |
|
"loss": 0.5915, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00020841171728030097, |
|
"loss": 0.5967, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0002080892233270626, |
|
"loss": 0.5922, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 0.608452558517456, |
|
"eval_runtime": 83.4452, |
|
"eval_samples_per_second": 23.968, |
|
"eval_steps_per_second": 2.996, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00020776672937382424, |
|
"loss": 0.5859, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020744423542058585, |
|
"loss": 0.5895, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020712174146734748, |
|
"loss": 0.5848, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020679924751410909, |
|
"loss": 0.5956, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002064767535608707, |
|
"loss": 0.5929, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00020615425960763235, |
|
"loss": 0.5888, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00020583176565439396, |
|
"loss": 0.5893, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0002055092717011556, |
|
"loss": 0.5937, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0002051867777479172, |
|
"loss": 0.5981, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0002048642837946788, |
|
"loss": 0.5964, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 0.6083669066429138, |
|
"eval_runtime": 83.3491, |
|
"eval_samples_per_second": 23.995, |
|
"eval_steps_per_second": 2.999, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00020454178984144047, |
|
"loss": 0.5951, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00020421929588820207, |
|
"loss": 0.5915, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0002038968019349637, |
|
"loss": 0.5914, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020357430798172531, |
|
"loss": 0.5957, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020325181402848695, |
|
"loss": 0.5885, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020292932007524858, |
|
"loss": 0.5899, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0002026068261220102, |
|
"loss": 0.5978, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00020228433216877182, |
|
"loss": 0.5946, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00020196183821553345, |
|
"loss": 0.5984, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00020163934426229506, |
|
"loss": 0.5872, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 0.6081907749176025, |
|
"eval_runtime": 83.4032, |
|
"eval_samples_per_second": 23.98, |
|
"eval_steps_per_second": 2.997, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0002013168503090567, |
|
"loss": 0.5918, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0002009943563558183, |
|
"loss": 0.5899, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0002006718624025799, |
|
"loss": 0.5901, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00020034936844934157, |
|
"loss": 0.5927, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00020002687449610318, |
|
"loss": 0.5996, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0001997043805428648, |
|
"loss": 0.5959, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019938188658962642, |
|
"loss": 0.5929, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019905939263638805, |
|
"loss": 0.5917, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019873689868314968, |
|
"loss": 0.5935, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0001984144047299113, |
|
"loss": 0.5955, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 0.6082538366317749, |
|
"eval_runtime": 83.3309, |
|
"eval_samples_per_second": 24.001, |
|
"eval_steps_per_second": 3.0, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019809191077667292, |
|
"loss": 0.5849, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019776941682343456, |
|
"loss": 0.5927, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019744692287019616, |
|
"loss": 0.5883, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0001971244289169578, |
|
"loss": 0.5921, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0001968019349637194, |
|
"loss": 0.5884, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.000196479441010481, |
|
"loss": 0.5835, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019615694705724267, |
|
"loss": 0.5918, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019583445310400428, |
|
"loss": 0.5949, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001955119591507659, |
|
"loss": 0.5908, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00019518946519752752, |
|
"loss": 0.5895, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_loss": 0.6080419421195984, |
|
"eval_runtime": 83.5935, |
|
"eval_samples_per_second": 23.925, |
|
"eval_steps_per_second": 2.991, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00019486697124428915, |
|
"loss": 0.5918, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00019454447729105078, |
|
"loss": 0.5891, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0001942219833378124, |
|
"loss": 0.5909, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019389948938457402, |
|
"loss": 0.5981, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019357699543133566, |
|
"loss": 0.5905, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019325450147809726, |
|
"loss": 0.5913, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0001929320075248589, |
|
"loss": 0.5885, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0001926095135716205, |
|
"loss": 0.5967, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019228701961838217, |
|
"loss": 0.5907, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019196452566514377, |
|
"loss": 0.5914, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 0.6082321405410767, |
|
"eval_runtime": 83.2497, |
|
"eval_samples_per_second": 24.024, |
|
"eval_steps_per_second": 3.003, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019164203171190538, |
|
"loss": 0.5842, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.000191319537758667, |
|
"loss": 0.5925, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00019099704380542862, |
|
"loss": 0.5858, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00019067454985219025, |
|
"loss": 0.5875, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00019035205589895189, |
|
"loss": 0.5916, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0001900295619457135, |
|
"loss": 0.5972, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00018970706799247513, |
|
"loss": 0.5869, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00018938457403923676, |
|
"loss": 0.5937, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00018906208008599837, |
|
"loss": 0.5942, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00018873958613276, |
|
"loss": 0.5852, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 0.6080183386802673, |
|
"eval_runtime": 83.8653, |
|
"eval_samples_per_second": 23.848, |
|
"eval_steps_per_second": 2.981, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0001884170921795216, |
|
"loss": 0.5909, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00018809459822628327, |
|
"loss": 0.5968, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00018777210427304487, |
|
"loss": 0.5896, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018744961031980648, |
|
"loss": 0.5935, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018712711636656811, |
|
"loss": 0.5935, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018680462241332972, |
|
"loss": 0.5909, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00018648212846009138, |
|
"loss": 0.5914, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.000186159634506853, |
|
"loss": 0.5926, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0001858371405536146, |
|
"loss": 0.5914, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00018551464660037623, |
|
"loss": 0.595, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 0.6081690788269043, |
|
"eval_runtime": 83.4491, |
|
"eval_samples_per_second": 23.967, |
|
"eval_steps_per_second": 2.996, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00018519215264713786, |
|
"loss": 0.587, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00018486965869389947, |
|
"loss": 0.5877, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0001845471647406611, |
|
"loss": 0.5988, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0001842246707874227, |
|
"loss": 0.5959, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018390217683418437, |
|
"loss": 0.5986, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018357968288094598, |
|
"loss": 0.5864, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00018325718892770758, |
|
"loss": 0.5915, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00018293469497446922, |
|
"loss": 0.5893, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00018261220102123082, |
|
"loss": 0.5926, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00018228970706799248, |
|
"loss": 0.5965, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 0.6080557703971863, |
|
"eval_runtime": 83.3607, |
|
"eval_samples_per_second": 23.992, |
|
"eval_steps_per_second": 2.999, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0001819672131147541, |
|
"loss": 0.5909, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0001816447191615157, |
|
"loss": 0.595, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00018132222520827733, |
|
"loss": 0.5918, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00018099973125503894, |
|
"loss": 0.5898, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00018067723730180057, |
|
"loss": 0.5931, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0001803547433485622, |
|
"loss": 0.5887, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0001800322493953238, |
|
"loss": 0.5927, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00017970975544208544, |
|
"loss": 0.5911, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00017938726148884708, |
|
"loss": 0.5916, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00017906476753560868, |
|
"loss": 0.5931, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_loss": 0.607985258102417, |
|
"eval_runtime": 83.5156, |
|
"eval_samples_per_second": 23.948, |
|
"eval_steps_per_second": 2.993, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00017874227358237032, |
|
"loss": 0.5903, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00017841977962913192, |
|
"loss": 0.5943, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00017809728567589358, |
|
"loss": 0.5976, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0001777747917226552, |
|
"loss": 0.5954, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0001774522977694168, |
|
"loss": 0.5919, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00017712980381617843, |
|
"loss": 0.5867, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00017680730986294004, |
|
"loss": 0.592, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0001764848159097017, |
|
"loss": 0.5919, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0001761623219564633, |
|
"loss": 0.5943, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0001758398280032249, |
|
"loss": 0.592, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 0.6081468462944031, |
|
"eval_runtime": 83.7403, |
|
"eval_samples_per_second": 23.883, |
|
"eval_steps_per_second": 2.985, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00017551733404998655, |
|
"loss": 0.595, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00017519484009674818, |
|
"loss": 0.5908, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00017487234614350979, |
|
"loss": 0.5953, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00017454985219027142, |
|
"loss": 0.5939, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00017422735823703303, |
|
"loss": 0.5949, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0001739048642837947, |
|
"loss": 0.597, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0001735823703305563, |
|
"loss": 0.5987, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0001732598763773179, |
|
"loss": 0.5976, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00017293738242407953, |
|
"loss": 0.5892, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00017261488847084114, |
|
"loss": 0.5919, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 0.6079828143119812, |
|
"eval_runtime": 83.5244, |
|
"eval_samples_per_second": 23.945, |
|
"eval_steps_per_second": 2.993, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0001722923945176028, |
|
"loss": 0.5977, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0001719699005643644, |
|
"loss": 0.5958, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.000171647406611126, |
|
"loss": 0.5888, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00017132491265788765, |
|
"loss": 0.5953, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00017100241870464928, |
|
"loss": 0.5902, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00017067992475141091, |
|
"loss": 0.5936, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00017035743079817252, |
|
"loss": 0.5918, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00017003493684493413, |
|
"loss": 0.5952, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0001697124428916958, |
|
"loss": 0.588, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0001693899489384574, |
|
"loss": 0.5804, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 0.6079012155532837, |
|
"eval_runtime": 83.6243, |
|
"eval_samples_per_second": 23.916, |
|
"eval_steps_per_second": 2.99, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.000169067454985219, |
|
"loss": 0.5991, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00016874496103198063, |
|
"loss": 0.5937, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00016842246707874224, |
|
"loss": 0.5914, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0001680999731255039, |
|
"loss": 0.592, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0001677774791722655, |
|
"loss": 0.5897, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00016745498521902712, |
|
"loss": 0.5922, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016713249126578875, |
|
"loss": 0.5944, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016680999731255038, |
|
"loss": 0.5925, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016648750335931202, |
|
"loss": 0.5904, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00016616500940607362, |
|
"loss": 0.5908, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 0.6079640984535217, |
|
"eval_runtime": 83.4353, |
|
"eval_samples_per_second": 23.971, |
|
"eval_steps_per_second": 2.996, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00016584251545283523, |
|
"loss": 0.5949, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0001655200214995969, |
|
"loss": 0.5898, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0001651975275463585, |
|
"loss": 0.5881, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0001648750335931201, |
|
"loss": 0.5911, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00016455253963988174, |
|
"loss": 0.5918, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00016423004568664334, |
|
"loss": 0.5928, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.000163907551733405, |
|
"loss": 0.5916, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0001635850577801666, |
|
"loss": 0.5903, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00016326256382692822, |
|
"loss": 0.5897, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00016294006987368985, |
|
"loss": 0.5969, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 0.6079634428024292, |
|
"eval_runtime": 83.8122, |
|
"eval_samples_per_second": 23.863, |
|
"eval_steps_per_second": 2.983, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00016261757592045146, |
|
"loss": 0.587, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00016229508196721312, |
|
"loss": 0.5929, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00016197258801397472, |
|
"loss": 0.5914, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00016165009406073633, |
|
"loss": 0.5915, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.000161327600107498, |
|
"loss": 0.5874, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0001610051061542596, |
|
"loss": 0.5862, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00016068261220102123, |
|
"loss": 0.594, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00016036011824778284, |
|
"loss": 0.5928, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00016003762429454444, |
|
"loss": 0.5861, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0001597151303413061, |
|
"loss": 0.5932, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 0.6077998280525208, |
|
"eval_runtime": 83.6323, |
|
"eval_samples_per_second": 23.914, |
|
"eval_steps_per_second": 2.989, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0001593926363880677, |
|
"loss": 0.5961, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00015907014243482932, |
|
"loss": 0.5913, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00015874764848159095, |
|
"loss": 0.5963, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00015842515452835256, |
|
"loss": 0.5927, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00015810266057511422, |
|
"loss": 0.585, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00015778016662187583, |
|
"loss": 0.5898, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00015745767266863743, |
|
"loss": 0.5958, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00015713517871539907, |
|
"loss": 0.5832, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0001568126847621607, |
|
"loss": 0.591, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00015649019080892233, |
|
"loss": 0.594, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 0.6079277396202087, |
|
"eval_runtime": 83.6924, |
|
"eval_samples_per_second": 23.897, |
|
"eval_steps_per_second": 2.987, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00015616769685568394, |
|
"loss": 0.5982, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00015584520290244555, |
|
"loss": 0.5918, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001555227089492072, |
|
"loss": 0.5949, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00015520021499596881, |
|
"loss": 0.5894, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00015487772104273045, |
|
"loss": 0.5887, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00015455522708949205, |
|
"loss": 0.5896, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015423273313625366, |
|
"loss": 0.5933, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015391023918301532, |
|
"loss": 0.5961, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015358774522977693, |
|
"loss": 0.586, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015326525127653853, |
|
"loss": 0.5921, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 0.607754111289978, |
|
"eval_runtime": 83.89, |
|
"eval_samples_per_second": 23.841, |
|
"eval_steps_per_second": 2.98, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015294275732330017, |
|
"loss": 0.5973, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0001526202633700618, |
|
"loss": 0.5917, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00015229776941682344, |
|
"loss": 0.5932, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00015197527546358504, |
|
"loss": 0.5966, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00015165278151034665, |
|
"loss": 0.5984, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0001513302875571083, |
|
"loss": 0.5951, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00015100779360386992, |
|
"loss": 0.5863, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00015068529965063155, |
|
"loss": 0.593, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00015036280569739316, |
|
"loss": 0.5906, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00015004031174415476, |
|
"loss": 0.5936, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 0.6078041791915894, |
|
"eval_runtime": 83.6959, |
|
"eval_samples_per_second": 23.896, |
|
"eval_steps_per_second": 2.987, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0001497178177909164, |
|
"loss": 0.5899, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00014939532383767803, |
|
"loss": 0.5911, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00014907282988443966, |
|
"loss": 0.5881, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00014875033593120127, |
|
"loss": 0.5879, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0001484278419779629, |
|
"loss": 0.5883, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0001481053480247245, |
|
"loss": 0.5974, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00014778285407148614, |
|
"loss": 0.5992, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00014746036011824778, |
|
"loss": 0.5892, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0001471378661650094, |
|
"loss": 0.5873, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00014681537221177102, |
|
"loss": 0.5905, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_loss": 0.607796847820282, |
|
"eval_runtime": 83.56, |
|
"eval_samples_per_second": 23.935, |
|
"eval_steps_per_second": 2.992, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00014649287825853262, |
|
"loss": 0.589, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00014617038430529426, |
|
"loss": 0.5866, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.0001458478903520559, |
|
"loss": 0.5895, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00014552539639881752, |
|
"loss": 0.592, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014520290244557913, |
|
"loss": 0.5954, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014488040849234076, |
|
"loss": 0.591, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014455791453910237, |
|
"loss": 0.5925, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.000144235420585864, |
|
"loss": 0.5938, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0001439129266326256, |
|
"loss": 0.5928, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00014359043267938725, |
|
"loss": 0.5979, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 0.6077319383621216, |
|
"eval_runtime": 83.3796, |
|
"eval_samples_per_second": 23.987, |
|
"eval_steps_per_second": 2.998, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00014326793872614888, |
|
"loss": 0.5922, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0001429454447729105, |
|
"loss": 0.5865, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00014262295081967212, |
|
"loss": 0.5845, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00014230045686643373, |
|
"loss": 0.5896, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00014197796291319536, |
|
"loss": 0.5905, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.000141655468959957, |
|
"loss": 0.5875, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00014133297500671863, |
|
"loss": 0.5936, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00014101048105348023, |
|
"loss": 0.6014, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00014068798710024187, |
|
"loss": 0.5898, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00014036549314700347, |
|
"loss": 0.5895, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 0.6077406406402588, |
|
"eval_runtime": 83.5244, |
|
"eval_samples_per_second": 23.945, |
|
"eval_steps_per_second": 2.993, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0001400429991937651, |
|
"loss": 0.5883, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0001397205052405267, |
|
"loss": 0.599, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00013939801128728835, |
|
"loss": 0.5964, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00013907551733404998, |
|
"loss": 0.5924, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00013875302338081161, |
|
"loss": 0.5866, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00013843052942757322, |
|
"loss": 0.5946, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00013810803547433483, |
|
"loss": 0.5953, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00013778554152109646, |
|
"loss": 0.5992, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.0001374630475678581, |
|
"loss": 0.6, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00013714055361461973, |
|
"loss": 0.596, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 0.607699453830719, |
|
"eval_runtime": 83.655, |
|
"eval_samples_per_second": 23.908, |
|
"eval_steps_per_second": 2.988, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00013681805966138133, |
|
"loss": 0.5904, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00013649556570814297, |
|
"loss": 0.5906, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00013617307175490457, |
|
"loss": 0.5939, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0001358505778016662, |
|
"loss": 0.5854, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00013552808384842784, |
|
"loss": 0.5917, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013520558989518945, |
|
"loss": 0.5966, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013488309594195108, |
|
"loss": 0.5853, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0001345606019887127, |
|
"loss": 0.5905, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013423810803547432, |
|
"loss": 0.5954, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00013391561408223593, |
|
"loss": 0.5992, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 0.6076706051826477, |
|
"eval_runtime": 83.9433, |
|
"eval_samples_per_second": 23.826, |
|
"eval_steps_per_second": 2.978, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00013359312012899756, |
|
"loss": 0.5816, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0001332706261757592, |
|
"loss": 0.5885, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00013294813222252083, |
|
"loss": 0.592, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00013262563826928244, |
|
"loss": 0.59, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00013230314431604407, |
|
"loss": 0.5903, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013198065036280568, |
|
"loss": 0.5902, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0001316581564095673, |
|
"loss": 0.5949, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013133566245632894, |
|
"loss": 0.5869, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00013101316850309055, |
|
"loss": 0.5945, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00013069067454985218, |
|
"loss": 0.5964, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 0.607550859451294, |
|
"eval_runtime": 83.5845, |
|
"eval_samples_per_second": 23.928, |
|
"eval_steps_per_second": 2.991, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0001303681805966138, |
|
"loss": 0.5986, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00013004568664337542, |
|
"loss": 0.6024, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00012972319269013706, |
|
"loss": 0.5901, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00012940069873689866, |
|
"loss": 0.5902, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0001290782047836603, |
|
"loss": 0.5921, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00012875571083042193, |
|
"loss": 0.5915, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00012843321687718354, |
|
"loss": 0.5952, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00012811072292394514, |
|
"loss": 0.5907, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00012778822897070678, |
|
"loss": 0.5959, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0001274657350174684, |
|
"loss": 0.5884, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 0.6075096726417542, |
|
"eval_runtime": 83.2167, |
|
"eval_samples_per_second": 24.034, |
|
"eval_steps_per_second": 3.004, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00012714324106423005, |
|
"loss": 0.5959, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00012682074711099165, |
|
"loss": 0.5841, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00012649825315775329, |
|
"loss": 0.5881, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001261757592045149, |
|
"loss": 0.5845, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00012585326525127653, |
|
"loss": 0.5918, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00012553077129803816, |
|
"loss": 0.5886, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00012520827734479977, |
|
"loss": 0.5941, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0001248857833915614, |
|
"loss": 0.5949, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012456328943832303, |
|
"loss": 0.5906, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012424079548508464, |
|
"loss": 0.5963, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 0.6074970960617065, |
|
"eval_runtime": 83.4435, |
|
"eval_samples_per_second": 23.968, |
|
"eval_steps_per_second": 2.996, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00012391830153184625, |
|
"loss": 0.5951, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00012359580757860788, |
|
"loss": 0.5896, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00012327331362536951, |
|
"loss": 0.5826, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00012295081967213115, |
|
"loss": 0.5885, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00012262832571889275, |
|
"loss": 0.5896, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.0001223058317656544, |
|
"loss": 0.5921, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012198333781241601, |
|
"loss": 0.588, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012166084385917763, |
|
"loss": 0.5885, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012133834990593926, |
|
"loss": 0.5969, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012101585595270087, |
|
"loss": 0.5908, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 0.6075094938278198, |
|
"eval_runtime": 83.5597, |
|
"eval_samples_per_second": 23.935, |
|
"eval_steps_per_second": 2.992, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0001206933619994625, |
|
"loss": 0.5908, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012037086804622412, |
|
"loss": 0.5925, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00012004837409298576, |
|
"loss": 0.5966, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00011972588013974738, |
|
"loss": 0.5862, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00011940338618650898, |
|
"loss": 0.5927, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00011908089223327062, |
|
"loss": 0.598, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00011875839828003224, |
|
"loss": 0.582, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00011843590432679387, |
|
"loss": 0.5982, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011811341037355548, |
|
"loss": 0.5972, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011779091642031711, |
|
"loss": 0.5999, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_loss": 0.6074954867362976, |
|
"eval_runtime": 83.1861, |
|
"eval_samples_per_second": 24.042, |
|
"eval_steps_per_second": 3.005, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011746842246707873, |
|
"loss": 0.5943, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011714592851384036, |
|
"loss": 0.5895, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00011682343456060198, |
|
"loss": 0.5908, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00011650094060736359, |
|
"loss": 0.5925, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00011617844665412522, |
|
"loss": 0.5886, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00011585595270088686, |
|
"loss": 0.5832, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00011553345874764848, |
|
"loss": 0.5889, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00011521096479441008, |
|
"loss": 0.5895, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011488847084117172, |
|
"loss": 0.5901, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011456597688793334, |
|
"loss": 0.593, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 0.6075332760810852, |
|
"eval_runtime": 83.8045, |
|
"eval_samples_per_second": 23.865, |
|
"eval_steps_per_second": 2.983, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011424348293469497, |
|
"loss": 0.5991, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00011392098898145659, |
|
"loss": 0.5968, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00011359849502821821, |
|
"loss": 0.5929, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00011327600107497983, |
|
"loss": 0.5917, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011295350712174146, |
|
"loss": 0.5948, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011263101316850308, |
|
"loss": 0.5903, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011230851921526469, |
|
"loss": 0.5895, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00011198602526202633, |
|
"loss": 0.5956, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00011166353130878795, |
|
"loss": 0.5847, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00011134103735554958, |
|
"loss": 0.585, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 0.6074097156524658, |
|
"eval_runtime": 83.3467, |
|
"eval_samples_per_second": 23.996, |
|
"eval_steps_per_second": 3.0, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0001110185434023112, |
|
"loss": 0.5937, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00011069604944907282, |
|
"loss": 0.5911, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00011037355549583444, |
|
"loss": 0.5962, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00011005106154259607, |
|
"loss": 0.5973, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010972856758935769, |
|
"loss": 0.5844, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010940607363611931, |
|
"loss": 0.5893, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010908357968288093, |
|
"loss": 0.5852, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010876108572964257, |
|
"loss": 0.5968, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010843859177640419, |
|
"loss": 0.5973, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010811609782316579, |
|
"loss": 0.5824, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 0.6074831485748291, |
|
"eval_runtime": 83.2845, |
|
"eval_samples_per_second": 24.014, |
|
"eval_steps_per_second": 3.002, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010779360386992743, |
|
"loss": 0.5889, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010747110991668905, |
|
"loss": 0.5848, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010714861596345068, |
|
"loss": 0.5851, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0001068261220102123, |
|
"loss": 0.6018, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010650362805697392, |
|
"loss": 0.5931, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010618113410373554, |
|
"loss": 0.5908, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010585864015049717, |
|
"loss": 0.5878, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0001055361461972588, |
|
"loss": 0.5943, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0001052136522440204, |
|
"loss": 0.589, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010489115829078203, |
|
"loss": 0.5985, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 0.6074686646461487, |
|
"eval_runtime": 83.5238, |
|
"eval_samples_per_second": 23.945, |
|
"eval_steps_per_second": 2.993, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010456866433754367, |
|
"loss": 0.5897, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010424617038430529, |
|
"loss": 0.5926, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00010392367643106692, |
|
"loss": 0.5931, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00010360118247782853, |
|
"loss": 0.5956, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00010327868852459015, |
|
"loss": 0.5889, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010295619457135178, |
|
"loss": 0.5945, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0001026337006181134, |
|
"loss": 0.5881, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010231120666487502, |
|
"loss": 0.595, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010198871271163664, |
|
"loss": 0.589, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010166621875839828, |
|
"loss": 0.5886, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 0.6073445081710815, |
|
"eval_runtime": 83.2878, |
|
"eval_samples_per_second": 24.013, |
|
"eval_steps_per_second": 3.002, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0001013437248051599, |
|
"loss": 0.5883, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010102123085192153, |
|
"loss": 0.5972, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010069873689868314, |
|
"loss": 0.5963, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010037624294544476, |
|
"loss": 0.5926, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010005374899220639, |
|
"loss": 0.5919, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.973125503896801e-05, |
|
"loss": 0.5898, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.940876108572963e-05, |
|
"loss": 0.5908, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.908626713249125e-05, |
|
"loss": 0.5925, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.876377317925288e-05, |
|
"loss": 0.5912, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.84412792260145e-05, |
|
"loss": 0.59, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 0.6073500514030457, |
|
"eval_runtime": 83.3951, |
|
"eval_samples_per_second": 23.982, |
|
"eval_steps_per_second": 2.998, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.811878527277614e-05, |
|
"loss": 0.596, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.779629131953774e-05, |
|
"loss": 0.5913, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.747379736629938e-05, |
|
"loss": 0.5926, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.7151303413061e-05, |
|
"loss": 0.591, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.682880945982263e-05, |
|
"loss": 0.5871, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.650631550658424e-05, |
|
"loss": 0.5918, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.618382155334586e-05, |
|
"loss": 0.5919, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.586132760010749e-05, |
|
"loss": 0.5982, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.553883364686911e-05, |
|
"loss": 0.5947, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.521633969363075e-05, |
|
"loss": 0.5904, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 0.607369601726532, |
|
"eval_runtime": 83.4201, |
|
"eval_samples_per_second": 23.975, |
|
"eval_steps_per_second": 2.997, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.489384574039235e-05, |
|
"loss": 0.5936, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.457135178715399e-05, |
|
"loss": 0.5836, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.42488578339156e-05, |
|
"loss": 0.59, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.392636388067724e-05, |
|
"loss": 0.5928, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.360386992743885e-05, |
|
"loss": 0.5921, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.328137597420048e-05, |
|
"loss": 0.5919, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.29588820209621e-05, |
|
"loss": 0.5966, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.263638806772373e-05, |
|
"loss": 0.597, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.231389411448534e-05, |
|
"loss": 0.5932, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.199140016124696e-05, |
|
"loss": 0.5884, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 0.6073188185691833, |
|
"eval_runtime": 83.7205, |
|
"eval_samples_per_second": 23.889, |
|
"eval_steps_per_second": 2.986, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.16689062080086e-05, |
|
"loss": 0.588, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.134641225477021e-05, |
|
"loss": 0.5908, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.102391830153185e-05, |
|
"loss": 0.589, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.070142434829345e-05, |
|
"loss": 0.5867, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.037893039505509e-05, |
|
"loss": 0.5987, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.005643644181671e-05, |
|
"loss": 0.5874, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.973394248857834e-05, |
|
"loss": 0.5894, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.941144853533995e-05, |
|
"loss": 0.5939, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.908895458210157e-05, |
|
"loss": 0.5887, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.87664606288632e-05, |
|
"loss": 0.5861, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_loss": 0.607224702835083, |
|
"eval_runtime": 83.4876, |
|
"eval_samples_per_second": 23.956, |
|
"eval_steps_per_second": 2.994, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.844396667562482e-05, |
|
"loss": 0.5897, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.812147272238646e-05, |
|
"loss": 0.5954, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.779897876914806e-05, |
|
"loss": 0.5968, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.74764848159097e-05, |
|
"loss": 0.5912, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.715399086267132e-05, |
|
"loss": 0.5959, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.683149690943295e-05, |
|
"loss": 0.5905, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.650900295619456e-05, |
|
"loss": 0.5954, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.618650900295619e-05, |
|
"loss": 0.5889, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.586401504971781e-05, |
|
"loss": 0.5895, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.554152109647944e-05, |
|
"loss": 0.5852, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_loss": 0.6072360873222351, |
|
"eval_runtime": 83.3501, |
|
"eval_samples_per_second": 23.995, |
|
"eval_steps_per_second": 2.999, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.521902714324106e-05, |
|
"loss": 0.5812, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.489653319000267e-05, |
|
"loss": 0.5899, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.45740392367643e-05, |
|
"loss": 0.5825, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.425154528352592e-05, |
|
"loss": 0.5917, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.392905133028756e-05, |
|
"loss": 0.5887, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.360655737704916e-05, |
|
"loss": 0.59, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.32840634238108e-05, |
|
"loss": 0.5906, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.296156947057242e-05, |
|
"loss": 0.5909, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.263907551733405e-05, |
|
"loss": 0.5883, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.231658156409567e-05, |
|
"loss": 0.5919, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_loss": 0.6071310639381409, |
|
"eval_runtime": 83.4575, |
|
"eval_samples_per_second": 23.964, |
|
"eval_steps_per_second": 2.996, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.199408761085728e-05, |
|
"loss": 0.5927, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.167159365761891e-05, |
|
"loss": 0.5922, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.134909970438054e-05, |
|
"loss": 0.5946, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.102660575114216e-05, |
|
"loss": 0.5834, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.070411179790377e-05, |
|
"loss": 0.5884, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.03816178446654e-05, |
|
"loss": 0.587, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.005912389142703e-05, |
|
"loss": 0.5948, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.973662993818866e-05, |
|
"loss": 0.5888, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.941413598495028e-05, |
|
"loss": 0.5885, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.90916420317119e-05, |
|
"loss": 0.5929, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_loss": 0.6071040034294128, |
|
"eval_runtime": 83.4169, |
|
"eval_samples_per_second": 23.976, |
|
"eval_steps_per_second": 2.997, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.876914807847352e-05, |
|
"loss": 0.5989, |
|
"step": 13820 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.844665412523515e-05, |
|
"loss": 0.5933, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.812416017199677e-05, |
|
"loss": 0.5899, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.780166621875838e-05, |
|
"loss": 0.593, |
|
"step": 13880 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.747917226552001e-05, |
|
"loss": 0.5993, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.715667831228163e-05, |
|
"loss": 0.5907, |
|
"step": 13920 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.683418435904327e-05, |
|
"loss": 0.5895, |
|
"step": 13940 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.651169040580487e-05, |
|
"loss": 0.5946, |
|
"step": 13960 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.61891964525665e-05, |
|
"loss": 0.5914, |
|
"step": 13980 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.586670249932813e-05, |
|
"loss": 0.5874, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_loss": 0.6071457266807556, |
|
"eval_runtime": 83.3637, |
|
"eval_samples_per_second": 23.991, |
|
"eval_steps_per_second": 2.999, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.554420854608976e-05, |
|
"loss": 0.5882, |
|
"step": 14020 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.522171459285138e-05, |
|
"loss": 0.5903, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.4899220639613e-05, |
|
"loss": 0.59, |
|
"step": 14060 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.457672668637462e-05, |
|
"loss": 0.5951, |
|
"step": 14080 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.425423273313625e-05, |
|
"loss": 0.5876, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.393173877989786e-05, |
|
"loss": 0.5961, |
|
"step": 14120 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.36092448266595e-05, |
|
"loss": 0.589, |
|
"step": 14140 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.328675087342111e-05, |
|
"loss": 0.5836, |
|
"step": 14160 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.296425692018273e-05, |
|
"loss": 0.5883, |
|
"step": 14180 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.264176296694437e-05, |
|
"loss": 0.5927, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_loss": 0.6070078015327454, |
|
"eval_runtime": 83.5563, |
|
"eval_samples_per_second": 23.936, |
|
"eval_steps_per_second": 2.992, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.231926901370599e-05, |
|
"loss": 0.5895, |
|
"step": 14220 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.199677506046761e-05, |
|
"loss": 0.593, |
|
"step": 14240 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.167428110722923e-05, |
|
"loss": 0.5854, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.135178715399086e-05, |
|
"loss": 0.5887, |
|
"step": 14280 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.102929320075248e-05, |
|
"loss": 0.5911, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.07067992475141e-05, |
|
"loss": 0.5878, |
|
"step": 14320 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.038430529427572e-05, |
|
"loss": 0.5913, |
|
"step": 14340 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.006181134103736e-05, |
|
"loss": 0.5868, |
|
"step": 14360 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.973931738779898e-05, |
|
"loss": 0.5978, |
|
"step": 14380 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.94168234345606e-05, |
|
"loss": 0.5927, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_loss": 0.6069301962852478, |
|
"eval_runtime": 83.3785, |
|
"eval_samples_per_second": 23.987, |
|
"eval_steps_per_second": 2.998, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.909432948132222e-05, |
|
"loss": 0.5838, |
|
"step": 14420 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.877183552808384e-05, |
|
"loss": 0.5929, |
|
"step": 14440 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.844934157484547e-05, |
|
"loss": 0.5853, |
|
"step": 14460 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.812684762160709e-05, |
|
"loss": 0.593, |
|
"step": 14480 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.780435366836871e-05, |
|
"loss": 0.587, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.748185971513033e-05, |
|
"loss": 0.5901, |
|
"step": 14520 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.715936576189196e-05, |
|
"loss": 0.5871, |
|
"step": 14540 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.683687180865358e-05, |
|
"loss": 0.5955, |
|
"step": 14560 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.65143778554152e-05, |
|
"loss": 0.5886, |
|
"step": 14580 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.619188390217684e-05, |
|
"loss": 0.5918, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_loss": 0.6069862842559814, |
|
"eval_runtime": 83.2688, |
|
"eval_samples_per_second": 24.019, |
|
"eval_steps_per_second": 3.002, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.586938994893844e-05, |
|
"loss": 0.5891, |
|
"step": 14620 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.554689599570008e-05, |
|
"loss": 0.5892, |
|
"step": 14640 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.52244020424617e-05, |
|
"loss": 0.5855, |
|
"step": 14660 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.490190808922332e-05, |
|
"loss": 0.5919, |
|
"step": 14680 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.457941413598494e-05, |
|
"loss": 0.5895, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.425692018274657e-05, |
|
"loss": 0.5938, |
|
"step": 14720 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.393442622950819e-05, |
|
"loss": 0.5935, |
|
"step": 14740 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.361193227626981e-05, |
|
"loss": 0.5913, |
|
"step": 14760 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.328943832303145e-05, |
|
"loss": 0.5898, |
|
"step": 14780 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.296694436979307e-05, |
|
"loss": 0.5945, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_loss": 0.6069702506065369, |
|
"eval_runtime": 83.2944, |
|
"eval_samples_per_second": 24.011, |
|
"eval_steps_per_second": 3.001, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.264445041655469e-05, |
|
"loss": 0.5912, |
|
"step": 14820 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.23219564633163e-05, |
|
"loss": 0.5909, |
|
"step": 14840 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.199946251007793e-05, |
|
"loss": 0.5871, |
|
"step": 14860 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.167696855683955e-05, |
|
"loss": 0.5889, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.135447460360118e-05, |
|
"loss": 0.5879, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.10319806503628e-05, |
|
"loss": 0.5912, |
|
"step": 14920 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.0709486697124426e-05, |
|
"loss": 0.5874, |
|
"step": 14940 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.038699274388605e-05, |
|
"loss": 0.5885, |
|
"step": 14960 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.0080623488309586e-05, |
|
"loss": 0.5876, |
|
"step": 14980 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.975812953507121e-05, |
|
"loss": 0.5849, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 0.6070013046264648, |
|
"eval_runtime": 83.6124, |
|
"eval_samples_per_second": 23.92, |
|
"eval_steps_per_second": 2.99, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.943563558183283e-05, |
|
"loss": 0.5854, |
|
"step": 15020 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.911314162859446e-05, |
|
"loss": 0.588, |
|
"step": 15040 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.879064767535608e-05, |
|
"loss": 0.5884, |
|
"step": 15060 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.846815372211771e-05, |
|
"loss": 0.5893, |
|
"step": 15080 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.8145659768879334e-05, |
|
"loss": 0.5875, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.7823165815640954e-05, |
|
"loss": 0.5866, |
|
"step": 15120 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.750067186240258e-05, |
|
"loss": 0.5929, |
|
"step": 15140 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.7178177909164194e-05, |
|
"loss": 0.59, |
|
"step": 15160 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.685568395592582e-05, |
|
"loss": 0.595, |
|
"step": 15180 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.653319000268744e-05, |
|
"loss": 0.5941, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_loss": 0.6068729758262634, |
|
"eval_runtime": 83.4028, |
|
"eval_samples_per_second": 23.98, |
|
"eval_steps_per_second": 2.998, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.621069604944907e-05, |
|
"loss": 0.593, |
|
"step": 15220 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.588820209621069e-05, |
|
"loss": 0.5893, |
|
"step": 15240 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.5565708142972315e-05, |
|
"loss": 0.5835, |
|
"step": 15260 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.5243214189733935e-05, |
|
"loss": 0.5869, |
|
"step": 15280 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.492072023649556e-05, |
|
"loss": 0.5882, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.459822628325719e-05, |
|
"loss": 0.5942, |
|
"step": 15320 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.427573233001881e-05, |
|
"loss": 0.5818, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.3953238376780436e-05, |
|
"loss": 0.5974, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.363074442354205e-05, |
|
"loss": 0.5935, |
|
"step": 15380 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.3308250470303676e-05, |
|
"loss": 0.5897, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 0.6068193912506104, |
|
"eval_runtime": 83.4516, |
|
"eval_samples_per_second": 23.966, |
|
"eval_steps_per_second": 2.996, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.2985756517065296e-05, |
|
"loss": 0.5887, |
|
"step": 15420 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.266326256382692e-05, |
|
"loss": 0.5904, |
|
"step": 15440 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.234076861058854e-05, |
|
"loss": 0.5887, |
|
"step": 15460 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.201827465735017e-05, |
|
"loss": 0.5854, |
|
"step": 15480 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.1695780704111797e-05, |
|
"loss": 0.5868, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.137328675087342e-05, |
|
"loss": 0.5922, |
|
"step": 15520 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.1050792797635044e-05, |
|
"loss": 0.5914, |
|
"step": 15540 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.0728298844396664e-05, |
|
"loss": 0.5973, |
|
"step": 15560 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.040580489115829e-05, |
|
"loss": 0.5882, |
|
"step": 15580 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.0083310937919904e-05, |
|
"loss": 0.5967, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 0.6067527532577515, |
|
"eval_runtime": 83.0971, |
|
"eval_samples_per_second": 24.068, |
|
"eval_steps_per_second": 3.009, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.976081698468154e-05, |
|
"loss": 0.5884, |
|
"step": 15620 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.943832303144315e-05, |
|
"loss": 0.5865, |
|
"step": 15640 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.911582907820478e-05, |
|
"loss": 0.5855, |
|
"step": 15660 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.8793335124966404e-05, |
|
"loss": 0.5926, |
|
"step": 15680 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.8470841171728025e-05, |
|
"loss": 0.5929, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.814834721848965e-05, |
|
"loss": 0.5997, |
|
"step": 15720 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.782585326525127e-05, |
|
"loss": 0.592, |
|
"step": 15740 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.75033593120129e-05, |
|
"loss": 0.5934, |
|
"step": 15760 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.718086535877452e-05, |
|
"loss": 0.5946, |
|
"step": 15780 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.6858371405536145e-05, |
|
"loss": 0.5881, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 0.6067208647727966, |
|
"eval_runtime": 83.4992, |
|
"eval_samples_per_second": 23.952, |
|
"eval_steps_per_second": 2.994, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.6535877452297765e-05, |
|
"loss": 0.5882, |
|
"step": 15820 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.621338349905939e-05, |
|
"loss": 0.5938, |
|
"step": 15840 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.5890889545821006e-05, |
|
"loss": 0.5908, |
|
"step": 15860 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.556839559258263e-05, |
|
"loss": 0.5964, |
|
"step": 15880 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.524590163934426e-05, |
|
"loss": 0.5909, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.492340768610588e-05, |
|
"loss": 0.5856, |
|
"step": 15920 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.4600913732867506e-05, |
|
"loss": 0.5856, |
|
"step": 15940 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.4278419779629126e-05, |
|
"loss": 0.5938, |
|
"step": 15960 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.395592582639075e-05, |
|
"loss": 0.5912, |
|
"step": 15980 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.363343187315237e-05, |
|
"loss": 0.5941, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_loss": 0.6066766381263733, |
|
"eval_runtime": 83.8427, |
|
"eval_samples_per_second": 23.854, |
|
"eval_steps_per_second": 2.982, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.3310937919914e-05, |
|
"loss": 0.5888, |
|
"step": 16020 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.298844396667562e-05, |
|
"loss": 0.5816, |
|
"step": 16040 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.266595001343725e-05, |
|
"loss": 0.5924, |
|
"step": 16060 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.2343456060198874e-05, |
|
"loss": 0.6001, |
|
"step": 16080 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.202096210696049e-05, |
|
"loss": 0.5897, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.1698468153722114e-05, |
|
"loss": 0.5932, |
|
"step": 16120 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.1375974200483734e-05, |
|
"loss": 0.5894, |
|
"step": 16140 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.105348024724536e-05, |
|
"loss": 0.5895, |
|
"step": 16160 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.073098629400698e-05, |
|
"loss": 0.5943, |
|
"step": 16180 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.040849234076861e-05, |
|
"loss": 0.5909, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_loss": 0.6066379547119141, |
|
"eval_runtime": 83.5749, |
|
"eval_samples_per_second": 23.931, |
|
"eval_steps_per_second": 2.991, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.008599838753023e-05, |
|
"loss": 0.5893, |
|
"step": 16220 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3.9763504434291855e-05, |
|
"loss": 0.5891, |
|
"step": 16240 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.9441010481053475e-05, |
|
"loss": 0.5898, |
|
"step": 16260 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.91185165278151e-05, |
|
"loss": 0.5977, |
|
"step": 16280 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.879602257457673e-05, |
|
"loss": 0.5899, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.847352862133835e-05, |
|
"loss": 0.5891, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.8151034668099976e-05, |
|
"loss": 0.5919, |
|
"step": 16340 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.782854071486159e-05, |
|
"loss": 0.5876, |
|
"step": 16360 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.7506046761623216e-05, |
|
"loss": 0.5874, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.718355280838484e-05, |
|
"loss": 0.5914, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 0.6065989136695862, |
|
"eval_runtime": 83.5367, |
|
"eval_samples_per_second": 23.942, |
|
"eval_steps_per_second": 2.993, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.686105885514646e-05, |
|
"loss": 0.587, |
|
"step": 16420 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.653856490190809e-05, |
|
"loss": 0.5924, |
|
"step": 16440 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.621607094866971e-05, |
|
"loss": 0.5929, |
|
"step": 16460 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.589357699543133e-05, |
|
"loss": 0.5938, |
|
"step": 16480 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.5571083042192957e-05, |
|
"loss": 0.5943, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.524858908895458e-05, |
|
"loss": 0.592, |
|
"step": 16520 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.4926095135716204e-05, |
|
"loss": 0.5904, |
|
"step": 16540 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.460360118247783e-05, |
|
"loss": 0.5869, |
|
"step": 16560 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.428110722923945e-05, |
|
"loss": 0.5874, |
|
"step": 16580 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.395861327600107e-05, |
|
"loss": 0.586, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_loss": 0.6066107153892517, |
|
"eval_runtime": 83.5674, |
|
"eval_samples_per_second": 23.933, |
|
"eval_steps_per_second": 2.992, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.36361193227627e-05, |
|
"loss": 0.5901, |
|
"step": 16620 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.331362536952432e-05, |
|
"loss": 0.5897, |
|
"step": 16640 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.2991131416285944e-05, |
|
"loss": 0.5896, |
|
"step": 16660 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.2668637463047564e-05, |
|
"loss": 0.5884, |
|
"step": 16680 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.2346143509809185e-05, |
|
"loss": 0.5917, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.202364955657081e-05, |
|
"loss": 0.5888, |
|
"step": 16720 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.170115560333243e-05, |
|
"loss": 0.589, |
|
"step": 16740 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.137866165009406e-05, |
|
"loss": 0.5881, |
|
"step": 16760 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.1056167696855685e-05, |
|
"loss": 0.5909, |
|
"step": 16780 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.0733673743617305e-05, |
|
"loss": 0.587, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 0.6065407991409302, |
|
"eval_runtime": 83.5297, |
|
"eval_samples_per_second": 23.944, |
|
"eval_steps_per_second": 2.993, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.041117979037893e-05, |
|
"loss": 0.5947, |
|
"step": 16820 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.0088685837140552e-05, |
|
"loss": 0.5894, |
|
"step": 16840 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.9766191883902176e-05, |
|
"loss": 0.5938, |
|
"step": 16860 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.9443697930663796e-05, |
|
"loss": 0.5852, |
|
"step": 16880 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.912120397742542e-05, |
|
"loss": 0.5893, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.8798710024187043e-05, |
|
"loss": 0.5915, |
|
"step": 16920 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.8476216070948666e-05, |
|
"loss": 0.5958, |
|
"step": 16940 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.8153722117710293e-05, |
|
"loss": 0.5919, |
|
"step": 16960 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.7831228164471916e-05, |
|
"loss": 0.5998, |
|
"step": 16980 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.7508734211233537e-05, |
|
"loss": 0.5979, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_loss": 0.6064393520355225, |
|
"eval_runtime": 83.7117, |
|
"eval_samples_per_second": 23.892, |
|
"eval_steps_per_second": 2.986, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.718624025799516e-05, |
|
"loss": 0.5881, |
|
"step": 17020 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.6863746304756784e-05, |
|
"loss": 0.5822, |
|
"step": 17040 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.6541252351518407e-05, |
|
"loss": 0.5921, |
|
"step": 17060 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.621875839828003e-05, |
|
"loss": 0.5932, |
|
"step": 17080 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.589626444504165e-05, |
|
"loss": 0.5988, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.5573770491803274e-05, |
|
"loss": 0.5928, |
|
"step": 17120 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.5251276538564897e-05, |
|
"loss": 0.5899, |
|
"step": 17140 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.4928782585326524e-05, |
|
"loss": 0.5852, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.4606288632088148e-05, |
|
"loss": 0.5918, |
|
"step": 17180 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.428379467884977e-05, |
|
"loss": 0.5918, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 0.6064170002937317, |
|
"eval_runtime": 83.4786, |
|
"eval_samples_per_second": 23.958, |
|
"eval_steps_per_second": 2.995, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.3961300725611395e-05, |
|
"loss": 0.5917, |
|
"step": 17220 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.3638806772373015e-05, |
|
"loss": 0.5863, |
|
"step": 17240 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.3316312819134638e-05, |
|
"loss": 0.5916, |
|
"step": 17260 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.2993818865896262e-05, |
|
"loss": 0.5952, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.2671324912657885e-05, |
|
"loss": 0.5958, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.234883095941951e-05, |
|
"loss": 0.5896, |
|
"step": 17320 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.2026337006181136e-05, |
|
"loss": 0.5897, |
|
"step": 17340 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.1703843052942756e-05, |
|
"loss": 0.594, |
|
"step": 17360 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.138134909970438e-05, |
|
"loss": 0.5931, |
|
"step": 17380 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.1058855146466003e-05, |
|
"loss": 0.5875, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_loss": 0.6064504384994507, |
|
"eval_runtime": 83.4439, |
|
"eval_samples_per_second": 23.968, |
|
"eval_steps_per_second": 2.996, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.0736361193227626e-05, |
|
"loss": 0.5886, |
|
"step": 17420 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.041386723998925e-05, |
|
"loss": 0.5906, |
|
"step": 17440 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.009137328675087e-05, |
|
"loss": 0.5915, |
|
"step": 17460 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.9768879333512493e-05, |
|
"loss": 0.5879, |
|
"step": 17480 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.9446385380274117e-05, |
|
"loss": 0.5863, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.912389142703574e-05, |
|
"loss": 0.5878, |
|
"step": 17520 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.8801397473797367e-05, |
|
"loss": 0.5872, |
|
"step": 17540 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.8478903520558987e-05, |
|
"loss": 0.5883, |
|
"step": 17560 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.815640956732061e-05, |
|
"loss": 0.5918, |
|
"step": 17580 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.7833915614082234e-05, |
|
"loss": 0.591, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_loss": 0.6063485145568848, |
|
"eval_runtime": 83.6891, |
|
"eval_samples_per_second": 23.898, |
|
"eval_steps_per_second": 2.987, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.7511421660843857e-05, |
|
"loss": 0.5927, |
|
"step": 17620 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.718892770760548e-05, |
|
"loss": 0.5865, |
|
"step": 17640 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.6866433754367104e-05, |
|
"loss": 0.5911, |
|
"step": 17660 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.6543939801128728e-05, |
|
"loss": 0.5873, |
|
"step": 17680 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.622144584789035e-05, |
|
"loss": 0.5907, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.5898951894651975e-05, |
|
"loss": 0.5877, |
|
"step": 17720 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.5576457941413598e-05, |
|
"loss": 0.5924, |
|
"step": 17740 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.525396398817522e-05, |
|
"loss": 0.5907, |
|
"step": 17760 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.4931470034936845e-05, |
|
"loss": 0.5935, |
|
"step": 17780 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.4608976081698467e-05, |
|
"loss": 0.5894, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_loss": 0.606357216835022, |
|
"eval_runtime": 83.4836, |
|
"eval_samples_per_second": 23.957, |
|
"eval_steps_per_second": 2.995, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.428648212846009e-05, |
|
"loss": 0.5899, |
|
"step": 17820 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.3963988175221714e-05, |
|
"loss": 0.5959, |
|
"step": 17840 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.3641494221983336e-05, |
|
"loss": 0.5887, |
|
"step": 17860 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.331900026874496e-05, |
|
"loss": 0.5888, |
|
"step": 17880 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.2996506315506584e-05, |
|
"loss": 0.5921, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.2674012362268206e-05, |
|
"loss": 0.5899, |
|
"step": 17920 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.235151840902983e-05, |
|
"loss": 0.5967, |
|
"step": 17940 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.2029024455791451e-05, |
|
"loss": 0.5932, |
|
"step": 17960 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.1706530502553076e-05, |
|
"loss": 0.5985, |
|
"step": 17980 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.13840365493147e-05, |
|
"loss": 0.5944, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_loss": 0.6062895655632019, |
|
"eval_runtime": 83.4829, |
|
"eval_samples_per_second": 23.957, |
|
"eval_steps_per_second": 2.995, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.1061542596076323e-05, |
|
"loss": 0.5942, |
|
"step": 18020 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.0739048642837945e-05, |
|
"loss": 0.5814, |
|
"step": 18040 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.0416554689599569e-05, |
|
"loss": 0.5896, |
|
"step": 18060 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.0094060736361194e-05, |
|
"loss": 0.5958, |
|
"step": 18080 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.771566783122816e-06, |
|
"loss": 0.5828, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 9.449072829884439e-06, |
|
"loss": 0.5978, |
|
"step": 18120 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 9.126578876646061e-06, |
|
"loss": 0.595, |
|
"step": 18140 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 8.804084923407686e-06, |
|
"loss": 0.5966, |
|
"step": 18160 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.481590970169308e-06, |
|
"loss": 0.5868, |
|
"step": 18180 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.159097016930933e-06, |
|
"loss": 0.5894, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_loss": 0.6062944531440735, |
|
"eval_runtime": 83.3214, |
|
"eval_samples_per_second": 24.003, |
|
"eval_steps_per_second": 3.0, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.836603063692555e-06, |
|
"loss": 0.588, |
|
"step": 18220 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 7.514109110454178e-06, |
|
"loss": 0.5939, |
|
"step": 18240 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 7.191615157215802e-06, |
|
"loss": 0.596, |
|
"step": 18260 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.869121203977424e-06, |
|
"loss": 0.5909, |
|
"step": 18280 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.546627250739049e-06, |
|
"loss": 0.5938, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 6.224133297500671e-06, |
|
"loss": 0.5887, |
|
"step": 18320 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.901639344262295e-06, |
|
"loss": 0.586, |
|
"step": 18340 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.579145391023918e-06, |
|
"loss": 0.5888, |
|
"step": 18360 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.256651437785541e-06, |
|
"loss": 0.5922, |
|
"step": 18380 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.934157484547164e-06, |
|
"loss": 0.5878, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_loss": 0.6062828302383423, |
|
"eval_runtime": 83.3962, |
|
"eval_samples_per_second": 23.982, |
|
"eval_steps_per_second": 2.998, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.611663531308788e-06, |
|
"loss": 0.5886, |
|
"step": 18420 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.28916957807041e-06, |
|
"loss": 0.5889, |
|
"step": 18440 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.966675624832034e-06, |
|
"loss": 0.5948, |
|
"step": 18460 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.6441816715936573e-06, |
|
"loss": 0.5896, |
|
"step": 18480 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 3.321687718355281e-06, |
|
"loss": 0.5901, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.9991937651169034e-06, |
|
"loss": 0.5917, |
|
"step": 18520 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.676699811878527e-06, |
|
"loss": 0.5878, |
|
"step": 18540 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.3542058586401504e-06, |
|
"loss": 0.5918, |
|
"step": 18560 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.031711905401774e-06, |
|
"loss": 0.5857, |
|
"step": 18580 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.7092179521633967e-06, |
|
"loss": 0.5928, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_loss": 0.60627281665802, |
|
"eval_runtime": 83.148, |
|
"eval_samples_per_second": 24.054, |
|
"eval_steps_per_second": 3.007, |
|
"step": 18600 |
|
} |
|
], |
|
"max_steps": 18705, |
|
"num_train_epochs": 3, |
|
"total_flos": 2.4177730269248225e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|