|
{ |
|
"best_metric": 0.7255927324295044, |
|
"best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved-belle-7b/checkpoint-12200", |
|
"epoch": 2.8847574852954216, |
|
"global_step": 12200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 1.8908, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 1.5545, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 1.1252, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 1.054, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0003, |
|
"loss": 1.0137, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002995233177087471, |
|
"loss": 1.0046, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002990466354174942, |
|
"loss": 0.9867, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029856995312624134, |
|
"loss": 0.9612, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002980932708349884, |
|
"loss": 0.9588, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002976165885437356, |
|
"loss": 0.9551, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 0.9459459185600281, |
|
"eval_runtime": 19.4211, |
|
"eval_samples_per_second": 102.981, |
|
"eval_steps_per_second": 3.244, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002971399062524827, |
|
"loss": 0.9516, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029666322396122984, |
|
"loss": 0.937, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002961865416699769, |
|
"loss": 0.936, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00029570985937872406, |
|
"loss": 0.9305, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00029523317708747115, |
|
"loss": 0.9146, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002947564947962183, |
|
"loss": 0.9226, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029427981250496543, |
|
"loss": 0.9108, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002938031302137125, |
|
"loss": 0.9129, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029332644792245965, |
|
"loss": 0.9063, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002928497656312068, |
|
"loss": 0.8996, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 0.9003962874412537, |
|
"eval_runtime": 19.5716, |
|
"eval_samples_per_second": 102.189, |
|
"eval_steps_per_second": 3.219, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00029237308333995393, |
|
"loss": 0.898, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000291896401048701, |
|
"loss": 0.8936, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00029141971875744815, |
|
"loss": 0.8932, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00029094303646619524, |
|
"loss": 0.8779, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002904663541749424, |
|
"loss": 0.8871, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00028998967188368946, |
|
"loss": 0.8929, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002895129895924366, |
|
"loss": 0.8878, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00028903630730118374, |
|
"loss": 0.8818, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00028855962500993083, |
|
"loss": 0.8826, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00028808294271867797, |
|
"loss": 0.879, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 0.8738257884979248, |
|
"eval_runtime": 19.3526, |
|
"eval_samples_per_second": 103.345, |
|
"eval_steps_per_second": 3.255, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002876062604274251, |
|
"loss": 0.87, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028712957813617224, |
|
"loss": 0.8715, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028665289584491933, |
|
"loss": 0.8724, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028617621355366647, |
|
"loss": 0.8741, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028569953126241355, |
|
"loss": 0.8705, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002852228489711607, |
|
"loss": 0.8702, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028474616667990783, |
|
"loss": 0.8618, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002842694843886549, |
|
"loss": 0.8617, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028379280209740206, |
|
"loss": 0.8677, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002833161198061492, |
|
"loss": 0.853, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 0.8541846871376038, |
|
"eval_runtime": 19.49, |
|
"eval_samples_per_second": 102.617, |
|
"eval_steps_per_second": 3.232, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002828394375148963, |
|
"loss": 0.8549, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002823627552236434, |
|
"loss": 0.847, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00028188607293239056, |
|
"loss": 0.8585, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00028140939064113764, |
|
"loss": 0.8568, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002809327083498848, |
|
"loss": 0.8482, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00028045602605863187, |
|
"loss": 0.845, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.000279979343767379, |
|
"loss": 0.8548, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00027950266147612615, |
|
"loss": 0.8372, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002790259791848733, |
|
"loss": 0.8423, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00027854929689362037, |
|
"loss": 0.8433, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 0.8396860361099243, |
|
"eval_runtime": 19.3459, |
|
"eval_samples_per_second": 103.381, |
|
"eval_steps_per_second": 3.257, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002780726146023675, |
|
"loss": 0.848, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002775959323111146, |
|
"loss": 0.8383, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00027711925001986173, |
|
"loss": 0.8385, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002766425677286089, |
|
"loss": 0.8308, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00027616588543735596, |
|
"loss": 0.8244, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002756892031461031, |
|
"loss": 0.835, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002752125208548502, |
|
"loss": 0.8337, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002747358385635974, |
|
"loss": 0.8348, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00027425915627234446, |
|
"loss": 0.8353, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002737824739810916, |
|
"loss": 0.8294, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 0.8274422287940979, |
|
"eval_runtime": 19.4187, |
|
"eval_samples_per_second": 102.993, |
|
"eval_steps_per_second": 3.244, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002733057916898387, |
|
"loss": 0.8337, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002728291093985858, |
|
"loss": 0.8435, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00027235242710733296, |
|
"loss": 0.8347, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00027187574481608005, |
|
"loss": 0.8258, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002713990625248272, |
|
"loss": 0.8304, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002709223802335743, |
|
"loss": 0.8264, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002704456979423214, |
|
"loss": 0.8313, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00026996901565106855, |
|
"loss": 0.814, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002694923333598157, |
|
"loss": 0.8223, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002690156510685628, |
|
"loss": 0.8159, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 0.8179089426994324, |
|
"eval_runtime": 19.4736, |
|
"eval_samples_per_second": 102.703, |
|
"eval_steps_per_second": 3.235, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002685389687773099, |
|
"loss": 0.8218, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.000268062286486057, |
|
"loss": 0.808, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00026758560419480414, |
|
"loss": 0.8253, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002671089219035513, |
|
"loss": 0.8174, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00026663223961229836, |
|
"loss": 0.8157, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002661555573210455, |
|
"loss": 0.8142, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002656788750297926, |
|
"loss": 0.8112, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00026520219273853973, |
|
"loss": 0.8232, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00026472551044728687, |
|
"loss": 0.8254, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.000264248828156034, |
|
"loss": 0.8059, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 0.8101135492324829, |
|
"eval_runtime": 19.5846, |
|
"eval_samples_per_second": 102.121, |
|
"eval_steps_per_second": 3.217, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0002637721458647811, |
|
"loss": 0.8062, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00026329546357352823, |
|
"loss": 0.805, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002628187812822753, |
|
"loss": 0.8109, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00026234209899102245, |
|
"loss": 0.801, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002618654166997696, |
|
"loss": 0.8043, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002613887344085167, |
|
"loss": 0.8002, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002609120521172638, |
|
"loss": 0.8152, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00026043536982601096, |
|
"loss": 0.8052, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0002599586875347581, |
|
"loss": 0.8136, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002594820052435052, |
|
"loss": 0.8044, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 0.8030326962471008, |
|
"eval_runtime": 19.4835, |
|
"eval_samples_per_second": 102.651, |
|
"eval_steps_per_second": 3.234, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002590053229522523, |
|
"loss": 0.7995, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002585286406609994, |
|
"loss": 0.7958, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00025805195836974654, |
|
"loss": 0.8034, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00025757527607849363, |
|
"loss": 0.8016, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00025709859378724077, |
|
"loss": 0.8048, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0002566219114959879, |
|
"loss": 0.8004, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00025614522920473505, |
|
"loss": 0.8041, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00025566854691348213, |
|
"loss": 0.7908, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00025519186462222927, |
|
"loss": 0.7958, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002547151823309764, |
|
"loss": 0.8013, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 0.7965430021286011, |
|
"eval_runtime": 19.4852, |
|
"eval_samples_per_second": 102.642, |
|
"eval_steps_per_second": 3.233, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0002542385000397235, |
|
"loss": 0.803, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00025376181774847064, |
|
"loss": 0.7966, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002532851354572177, |
|
"loss": 0.7946, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00025280845316596486, |
|
"loss": 0.8023, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00025233177087471194, |
|
"loss": 0.7953, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0002518550885834591, |
|
"loss": 0.8053, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002513784062922062, |
|
"loss": 0.7883, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00025090172400095336, |
|
"loss": 0.7984, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00025042504170970045, |
|
"loss": 0.7962, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0002499483594184476, |
|
"loss": 0.7847, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 0.7915623784065247, |
|
"eval_runtime": 19.5509, |
|
"eval_samples_per_second": 102.297, |
|
"eval_steps_per_second": 3.222, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0002494716771271947, |
|
"loss": 0.7917, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002489949948359418, |
|
"loss": 0.7942, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00024851831254468895, |
|
"loss": 0.7921, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00024804163025343603, |
|
"loss": 0.7971, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002475649479621832, |
|
"loss": 0.7919, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002470882656709303, |
|
"loss": 0.7917, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00024661158337967745, |
|
"loss": 0.8024, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00024613490108842454, |
|
"loss": 0.7761, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002456582187971717, |
|
"loss": 0.7958, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00024518153650591876, |
|
"loss": 0.7855, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 0.7870249152183533, |
|
"eval_runtime": 19.5953, |
|
"eval_samples_per_second": 102.065, |
|
"eval_steps_per_second": 3.215, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002447048542146659, |
|
"loss": 0.784, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00024422817192341304, |
|
"loss": 0.7926, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00024375148963216013, |
|
"loss": 0.7845, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00024327480734090726, |
|
"loss": 0.782, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00024279812504965438, |
|
"loss": 0.7808, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00024232144275840152, |
|
"loss": 0.7926, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00024184476046714863, |
|
"loss": 0.7795, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00024136807817589574, |
|
"loss": 0.7888, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00024089139588464288, |
|
"loss": 0.7888, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00024041471359339, |
|
"loss": 0.7863, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 0.7825512290000916, |
|
"eval_runtime": 19.4274, |
|
"eval_samples_per_second": 102.948, |
|
"eval_steps_per_second": 3.243, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0002399380313021371, |
|
"loss": 0.7881, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00023946134901088422, |
|
"loss": 0.7841, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00023898466671963133, |
|
"loss": 0.7849, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00023850798442837844, |
|
"loss": 0.7809, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0002380313021371256, |
|
"loss": 0.7757, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00023755461984587272, |
|
"loss": 0.7787, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00023707793755461983, |
|
"loss": 0.7766, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00023660125526336694, |
|
"loss": 0.7867, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023612457297211405, |
|
"loss": 0.7767, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002356478906808612, |
|
"loss": 0.7806, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 0.7781409621238708, |
|
"eval_runtime": 20.131, |
|
"eval_samples_per_second": 99.349, |
|
"eval_steps_per_second": 3.13, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002351712083896083, |
|
"loss": 0.7774, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00023469452609835542, |
|
"loss": 0.7782, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00023421784380710253, |
|
"loss": 0.7773, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00023374116151584964, |
|
"loss": 0.7845, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0002332644792245968, |
|
"loss": 0.7879, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023278779693334392, |
|
"loss": 0.7801, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00023231111464209103, |
|
"loss": 0.7713, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00023183443235083814, |
|
"loss": 0.7742, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00023135775005958526, |
|
"loss": 0.7783, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002308810677683324, |
|
"loss": 0.7698, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 0.7747411131858826, |
|
"eval_runtime": 20.0968, |
|
"eval_samples_per_second": 99.519, |
|
"eval_steps_per_second": 3.135, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002304043854770795, |
|
"loss": 0.7696, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022992770318582662, |
|
"loss": 0.7744, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022945102089457373, |
|
"loss": 0.7687, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00022897433860332084, |
|
"loss": 0.7765, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.000228497656312068, |
|
"loss": 0.7709, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00022802097402081512, |
|
"loss": 0.773, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00022754429172956224, |
|
"loss": 0.7862, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00022706760943830935, |
|
"loss": 0.7668, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00022659092714705646, |
|
"loss": 0.7816, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022611424485580357, |
|
"loss": 0.7831, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 0.7719215154647827, |
|
"eval_runtime": 19.6387, |
|
"eval_samples_per_second": 101.84, |
|
"eval_steps_per_second": 3.208, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0002256375625645507, |
|
"loss": 0.7723, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00022516088027329782, |
|
"loss": 0.7727, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00022468419798204493, |
|
"loss": 0.7719, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00022420751569079207, |
|
"loss": 0.7796, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0002237308333995392, |
|
"loss": 0.7685, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022325415110828633, |
|
"loss": 0.7725, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022277746881703344, |
|
"loss": 0.7638, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022230078652578055, |
|
"loss": 0.7771, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00022182410423452766, |
|
"loss": 0.7689, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00022134742194327477, |
|
"loss": 0.7797, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 0.768983006477356, |
|
"eval_runtime": 19.4428, |
|
"eval_samples_per_second": 102.866, |
|
"eval_steps_per_second": 3.24, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0002208707396520219, |
|
"loss": 0.7734, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00022039405736076903, |
|
"loss": 0.7719, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00021991737506951614, |
|
"loss": 0.767, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00021944069277826328, |
|
"loss": 0.7758, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0002189640104870104, |
|
"loss": 0.7768, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00021848732819575753, |
|
"loss": 0.7641, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00021801064590450464, |
|
"loss": 0.7694, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00021753396361325175, |
|
"loss": 0.7835, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00021705728132199886, |
|
"loss": 0.7642, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00021658059903074598, |
|
"loss": 0.7719, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 0.7660636305809021, |
|
"eval_runtime": 19.5996, |
|
"eval_samples_per_second": 102.043, |
|
"eval_steps_per_second": 3.214, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0002161039167394931, |
|
"loss": 0.7723, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00021562723444824023, |
|
"loss": 0.76, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00021515055215698734, |
|
"loss": 0.7643, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00021467386986573448, |
|
"loss": 0.7599, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002141971875744816, |
|
"loss": 0.7623, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0002137205052832287, |
|
"loss": 0.7621, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00021324382299197584, |
|
"loss": 0.7691, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00021276714070072295, |
|
"loss": 0.7665, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00021229045840947007, |
|
"loss": 0.7742, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00021181377611821718, |
|
"loss": 0.7624, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 0.7643172740936279, |
|
"eval_runtime": 19.487, |
|
"eval_samples_per_second": 102.633, |
|
"eval_steps_per_second": 3.233, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0002113370938269643, |
|
"loss": 0.7726, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0002108604115357114, |
|
"loss": 0.7559, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00021038372924445857, |
|
"loss": 0.7634, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00020990704695320568, |
|
"loss": 0.765, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0002094303646619528, |
|
"loss": 0.7649, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0002089536823706999, |
|
"loss": 0.763, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00020847700007944705, |
|
"loss": 0.7679, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020800031778819416, |
|
"loss": 0.7644, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020752363549694127, |
|
"loss": 0.7655, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00020704695320568838, |
|
"loss": 0.7681, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 0.7610963582992554, |
|
"eval_runtime": 19.5269, |
|
"eval_samples_per_second": 102.423, |
|
"eval_steps_per_second": 3.226, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002065702709144355, |
|
"loss": 0.7623, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0002060935886231826, |
|
"loss": 0.7625, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00020561690633192977, |
|
"loss": 0.7524, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00020514022404067688, |
|
"loss": 0.764, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.000204663541749424, |
|
"loss": 0.7513, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0002041868594581711, |
|
"loss": 0.753, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020371017716691822, |
|
"loss": 0.7602, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020323349487566536, |
|
"loss": 0.7701, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00020275681258441247, |
|
"loss": 0.7602, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00020228013029315958, |
|
"loss": 0.7598, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 0.760128915309906, |
|
"eval_runtime": 19.4387, |
|
"eval_samples_per_second": 102.888, |
|
"eval_steps_per_second": 3.241, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0002018034480019067, |
|
"loss": 0.7579, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00020132676571065384, |
|
"loss": 0.7628, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00020085008341940097, |
|
"loss": 0.7551, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0002003734011281481, |
|
"loss": 0.7582, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0001998967188368952, |
|
"loss": 0.7623, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0001994200365456423, |
|
"loss": 0.7504, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019894335425438942, |
|
"loss": 0.7587, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019846667196313654, |
|
"loss": 0.7528, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019798998967188367, |
|
"loss": 0.754, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019751330738063079, |
|
"loss": 0.759, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 0.7575392127037048, |
|
"eval_runtime": 19.5275, |
|
"eval_samples_per_second": 102.42, |
|
"eval_steps_per_second": 3.226, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0001970366250893779, |
|
"loss": 0.7592, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019655994279812504, |
|
"loss": 0.7548, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019608326050687218, |
|
"loss": 0.7632, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001956065782156193, |
|
"loss": 0.7472, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001951298959243664, |
|
"loss": 0.7496, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0001946532136331135, |
|
"loss": 0.7549, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019417653134186063, |
|
"loss": 0.77, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019369984905060774, |
|
"loss": 0.759, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019322316675935488, |
|
"loss": 0.7554, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.000192746484468102, |
|
"loss": 0.7577, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 0.7568497061729431, |
|
"eval_runtime": 19.53, |
|
"eval_samples_per_second": 102.406, |
|
"eval_steps_per_second": 3.226, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0001922698021768491, |
|
"loss": 0.7617, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00019179311988559624, |
|
"loss": 0.7551, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00019131643759434335, |
|
"loss": 0.7482, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0001908397553030905, |
|
"loss": 0.7516, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0001903630730118376, |
|
"loss": 0.7555, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00018988639072058472, |
|
"loss": 0.7605, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00018940970842933183, |
|
"loss": 0.7506, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00018893302613807894, |
|
"loss": 0.7622, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00018845634384682605, |
|
"loss": 0.75, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0001879796615555732, |
|
"loss": 0.7572, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_loss": 0.7548028826713562, |
|
"eval_runtime": 19.5411, |
|
"eval_samples_per_second": 102.349, |
|
"eval_steps_per_second": 3.224, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018750297926432033, |
|
"loss": 0.7427, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018702629697306744, |
|
"loss": 0.7489, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00018654961468181455, |
|
"loss": 0.755, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00018607293239056167, |
|
"loss": 0.7517, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0001855962500993088, |
|
"loss": 0.7529, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00018511956780805592, |
|
"loss": 0.7498, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018464288551680303, |
|
"loss": 0.756, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018416620322555014, |
|
"loss": 0.7492, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00018368952093429725, |
|
"loss": 0.7491, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00018321283864304437, |
|
"loss": 0.7585, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 0.7538104057312012, |
|
"eval_runtime": 19.6106, |
|
"eval_samples_per_second": 101.986, |
|
"eval_steps_per_second": 3.213, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00018273615635179153, |
|
"loss": 0.7531, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00018225947406053865, |
|
"loss": 0.7511, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00018178279176928576, |
|
"loss": 0.7541, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00018130610947803287, |
|
"loss": 0.7465, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00018082942718678, |
|
"loss": 0.7403, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00018035274489552712, |
|
"loss": 0.749, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00017987606260427423, |
|
"loss": 0.7548, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00017939938031302134, |
|
"loss": 0.7443, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00017892269802176846, |
|
"loss": 0.7461, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00017844601573051557, |
|
"loss": 0.7511, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 0.7509217262268066, |
|
"eval_runtime": 19.5437, |
|
"eval_samples_per_second": 102.335, |
|
"eval_steps_per_second": 3.224, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00017796933343926274, |
|
"loss": 0.7562, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00017749265114800985, |
|
"loss": 0.7489, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00017701596885675696, |
|
"loss": 0.7499, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00017653928656550407, |
|
"loss": 0.7519, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00017606260427425118, |
|
"loss": 0.7536, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00017558592198299832, |
|
"loss": 0.7536, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00017510923969174544, |
|
"loss": 0.7492, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00017463255740049255, |
|
"loss": 0.7454, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00017415587510923966, |
|
"loss": 0.7528, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0001736791928179868, |
|
"loss": 0.7409, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 0.7497395873069763, |
|
"eval_runtime": 19.5671, |
|
"eval_samples_per_second": 102.212, |
|
"eval_steps_per_second": 3.22, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00017320251052673394, |
|
"loss": 0.7434, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00017272582823548105, |
|
"loss": 0.7543, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00017224914594422816, |
|
"loss": 0.7457, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00017177246365297527, |
|
"loss": 0.7439, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0001712957813617224, |
|
"loss": 0.7412, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0001708190990704695, |
|
"loss": 0.7409, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00017034241677921664, |
|
"loss": 0.7473, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00016986573448796375, |
|
"loss": 0.7486, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00016938905219671086, |
|
"loss": 0.7439, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.000168912369905458, |
|
"loss": 0.7524, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 0.7480019330978394, |
|
"eval_runtime": 19.5018, |
|
"eval_samples_per_second": 102.555, |
|
"eval_steps_per_second": 3.23, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00016843568761420514, |
|
"loss": 0.7464, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00016795900532295225, |
|
"loss": 0.7511, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016748232303169936, |
|
"loss": 0.7423, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016700564074044648, |
|
"loss": 0.7422, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0001665289584491936, |
|
"loss": 0.742, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0001660522761579407, |
|
"loss": 0.7421, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00016557559386668784, |
|
"loss": 0.749, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00016509891157543495, |
|
"loss": 0.7432, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0001646222292841821, |
|
"loss": 0.7426, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0001641455469929292, |
|
"loss": 0.7543, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 0.7470090389251709, |
|
"eval_runtime": 19.5563, |
|
"eval_samples_per_second": 102.269, |
|
"eval_steps_per_second": 3.221, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00016366886470167632, |
|
"loss": 0.7451, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00016319218241042346, |
|
"loss": 0.7481, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00016271550011917057, |
|
"loss": 0.7381, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00016223881782791768, |
|
"loss": 0.7461, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0001617621355366648, |
|
"loss": 0.7467, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0001612854532454119, |
|
"loss": 0.745, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00016080877095415902, |
|
"loss": 0.745, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00016033208866290615, |
|
"loss": 0.7386, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0001598554063716533, |
|
"loss": 0.7363, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0001593787240804004, |
|
"loss": 0.7412, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 0.7454522848129272, |
|
"eval_runtime": 19.555, |
|
"eval_samples_per_second": 102.276, |
|
"eval_steps_per_second": 3.222, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00015890204178914752, |
|
"loss": 0.7501, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00015842535949789463, |
|
"loss": 0.7528, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00015794867720664177, |
|
"loss": 0.7373, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00015747199491538888, |
|
"loss": 0.7451, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.000156995312624136, |
|
"loss": 0.7384, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001565186303328831, |
|
"loss": 0.7471, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00015604194804163022, |
|
"loss": 0.7454, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00015556526575037733, |
|
"loss": 0.7415, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0001550885834591245, |
|
"loss": 0.7514, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0001546119011678716, |
|
"loss": 0.7343, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 0.7457332611083984, |
|
"eval_runtime": 19.5673, |
|
"eval_samples_per_second": 102.212, |
|
"eval_steps_per_second": 3.22, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015413521887661872, |
|
"loss": 0.7452, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015365853658536583, |
|
"loss": 0.7456, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015318185429411297, |
|
"loss": 0.7326, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015270517200286008, |
|
"loss": 0.7431, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0001522284897116072, |
|
"loss": 0.7419, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0001517518074203543, |
|
"loss": 0.7375, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00015127512512910142, |
|
"loss": 0.7419, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0001507984428378486, |
|
"loss": 0.7431, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0001503217605465957, |
|
"loss": 0.7412, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00014984507825534278, |
|
"loss": 0.7447, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 0.7441338896751404, |
|
"eval_runtime": 19.4509, |
|
"eval_samples_per_second": 102.823, |
|
"eval_steps_per_second": 3.239, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00014936839596408992, |
|
"loss": 0.7436, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00014889171367283704, |
|
"loss": 0.7402, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00014841503138158415, |
|
"loss": 0.7454, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0001479383490903313, |
|
"loss": 0.738, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0001474616667990784, |
|
"loss": 0.7396, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00014698498450782554, |
|
"loss": 0.7333, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00014650830221657265, |
|
"loss": 0.7482, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00014603161992531976, |
|
"loss": 0.7376, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014555493763406687, |
|
"loss": 0.7369, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014507825534281401, |
|
"loss": 0.7347, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_loss": 0.7425362467765808, |
|
"eval_runtime": 19.5248, |
|
"eval_samples_per_second": 102.434, |
|
"eval_steps_per_second": 3.227, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00014460157305156113, |
|
"loss": 0.7446, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00014412489076030824, |
|
"loss": 0.7343, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00014364820846905535, |
|
"loss": 0.7468, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0001431715261778025, |
|
"loss": 0.749, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0001426948438865496, |
|
"loss": 0.7401, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0001422181615952967, |
|
"loss": 0.7364, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00014174147930404385, |
|
"loss": 0.7442, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00014126479701279096, |
|
"loss": 0.7385, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00014078811472153808, |
|
"loss": 0.7412, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00014031143243028522, |
|
"loss": 0.7377, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_loss": 0.7418386936187744, |
|
"eval_runtime": 19.5679, |
|
"eval_samples_per_second": 102.208, |
|
"eval_steps_per_second": 3.22, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00013983475013903233, |
|
"loss": 0.7432, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00013935806784777944, |
|
"loss": 0.7379, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00013888138555652655, |
|
"loss": 0.7346, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00013840470326527366, |
|
"loss": 0.7373, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.0001379280209740208, |
|
"loss": 0.7403, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00013745133868276792, |
|
"loss": 0.7477, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00013697465639151506, |
|
"loss": 0.7343, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00013649797410026217, |
|
"loss": 0.7419, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00013602129180900928, |
|
"loss": 0.7327, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013554460951775642, |
|
"loss": 0.7398, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 0.7402775883674622, |
|
"eval_runtime": 19.5554, |
|
"eval_samples_per_second": 102.274, |
|
"eval_steps_per_second": 3.222, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013506792722650353, |
|
"loss": 0.7311, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013459124493525064, |
|
"loss": 0.7319, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00013411456264399775, |
|
"loss": 0.7315, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0001336378803527449, |
|
"loss": 0.7329, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.000133161198061492, |
|
"loss": 0.7471, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00013268451577023912, |
|
"loss": 0.7446, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013220783347898623, |
|
"loss": 0.7359, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013173115118773337, |
|
"loss": 0.7348, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00013125446889648048, |
|
"loss": 0.7331, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00013077778660522762, |
|
"loss": 0.7385, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 0.7401012182235718, |
|
"eval_runtime": 19.7831, |
|
"eval_samples_per_second": 101.096, |
|
"eval_steps_per_second": 3.185, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00013030110431397473, |
|
"loss": 0.744, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00012982442202272185, |
|
"loss": 0.7327, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00012934773973146896, |
|
"loss": 0.7384, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0001288710574402161, |
|
"loss": 0.7399, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0001283943751489632, |
|
"loss": 0.7376, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00012791769285771032, |
|
"loss": 0.7416, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00012744101056645743, |
|
"loss": 0.7299, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00012696432827520455, |
|
"loss": 0.7389, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00012648764598395168, |
|
"loss": 0.7295, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0001260109636926988, |
|
"loss": 0.7389, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 0.7385362386703491, |
|
"eval_runtime": 19.6728, |
|
"eval_samples_per_second": 101.663, |
|
"eval_steps_per_second": 3.202, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00012553428140144594, |
|
"loss": 0.7346, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012505759911019305, |
|
"loss": 0.7357, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012458091681894016, |
|
"loss": 0.7295, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0001241042345276873, |
|
"loss": 0.7418, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0001236275522364344, |
|
"loss": 0.7248, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00012315086994518152, |
|
"loss": 0.7326, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00012267418765392864, |
|
"loss": 0.7422, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012219750536267577, |
|
"loss": 0.7376, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012172082307142289, |
|
"loss": 0.7358, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012124414078017001, |
|
"loss": 0.7337, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 0.737734854221344, |
|
"eval_runtime": 19.8317, |
|
"eval_samples_per_second": 100.849, |
|
"eval_steps_per_second": 3.177, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012076745848891712, |
|
"loss": 0.7318, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00012029077619766424, |
|
"loss": 0.7356, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00011981409390641138, |
|
"loss": 0.7355, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00011933741161515849, |
|
"loss": 0.74, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0001188607293239056, |
|
"loss": 0.7342, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011838404703265273, |
|
"loss": 0.7368, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011790736474139984, |
|
"loss": 0.7337, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011743068245014698, |
|
"loss": 0.7317, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00011695400015889409, |
|
"loss": 0.738, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0001164773178676412, |
|
"loss": 0.7375, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_loss": 0.7366506457328796, |
|
"eval_runtime": 19.9586, |
|
"eval_samples_per_second": 100.208, |
|
"eval_steps_per_second": 3.157, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00011600063557638833, |
|
"loss": 0.7349, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00011552395328513544, |
|
"loss": 0.733, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011504727099388258, |
|
"loss": 0.7277, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011457058870262969, |
|
"loss": 0.7235, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0001140939064113768, |
|
"loss": 0.7405, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00011361722412012393, |
|
"loss": 0.7378, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011314054182887104, |
|
"loss": 0.7292, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011266385953761818, |
|
"loss": 0.7427, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00011218717724636529, |
|
"loss": 0.7313, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0001117104949551124, |
|
"loss": 0.7252, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 0.736083984375, |
|
"eval_runtime": 19.7958, |
|
"eval_samples_per_second": 101.031, |
|
"eval_steps_per_second": 3.182, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00011123381266385953, |
|
"loss": 0.7268, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00011075713037260666, |
|
"loss": 0.729, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00011028044808135377, |
|
"loss": 0.7358, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010980376579010089, |
|
"loss": 0.7408, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.000109327083498848, |
|
"loss": 0.73, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010887423532215777, |
|
"loss": 0.7298, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0001083975530309049, |
|
"loss": 0.7324, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010792087073965201, |
|
"loss": 0.7296, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010744418844839912, |
|
"loss": 0.7346, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010696750615714626, |
|
"loss": 0.7281, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_loss": 0.7352190613746643, |
|
"eval_runtime": 19.6635, |
|
"eval_samples_per_second": 101.711, |
|
"eval_steps_per_second": 3.204, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010649082386589337, |
|
"loss": 0.7377, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0001060141415746405, |
|
"loss": 0.7281, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010553745928338761, |
|
"loss": 0.7251, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010506077699213472, |
|
"loss": 0.7331, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010458409470088186, |
|
"loss": 0.7432, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00010410741240962897, |
|
"loss": 0.7366, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0001036307301183761, |
|
"loss": 0.7334, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010315404782712321, |
|
"loss": 0.7351, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010267736553587032, |
|
"loss": 0.7355, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010220068324461746, |
|
"loss": 0.7228, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 0.7341500520706177, |
|
"eval_runtime": 19.6196, |
|
"eval_samples_per_second": 101.939, |
|
"eval_steps_per_second": 3.211, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010172400095336457, |
|
"loss": 0.7451, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010124731866211169, |
|
"loss": 0.7356, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010077063637085881, |
|
"loss": 0.7255, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.00010029395407960592, |
|
"loss": 0.7267, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.981727178835306e-05, |
|
"loss": 0.7291, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.934058949710018e-05, |
|
"loss": 0.7294, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.886390720584729e-05, |
|
"loss": 0.7377, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.838722491459441e-05, |
|
"loss": 0.7324, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.791054262334154e-05, |
|
"loss": 0.7286, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.743386033208867e-05, |
|
"loss": 0.7286, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 0.734474241733551, |
|
"eval_runtime": 19.5642, |
|
"eval_samples_per_second": 102.228, |
|
"eval_steps_per_second": 3.22, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.695717804083578e-05, |
|
"loss": 0.7304, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.648049574958289e-05, |
|
"loss": 0.7348, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.600381345833002e-05, |
|
"loss": 0.7261, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.552713116707714e-05, |
|
"loss": 0.7313, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.505044887582425e-05, |
|
"loss": 0.7379, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.457376658457138e-05, |
|
"loss": 0.7203, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.409708429331849e-05, |
|
"loss": 0.7306, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.36204020020656e-05, |
|
"loss": 0.7332, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.314371971081274e-05, |
|
"loss": 0.7228, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.266703741955985e-05, |
|
"loss": 0.731, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 0.7332338690757751, |
|
"eval_runtime": 19.7114, |
|
"eval_samples_per_second": 101.464, |
|
"eval_steps_per_second": 3.196, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.219035512830698e-05, |
|
"loss": 0.7267, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.171367283705409e-05, |
|
"loss": 0.7285, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.12369905458012e-05, |
|
"loss": 0.7214, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.076030825454834e-05, |
|
"loss": 0.7204, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.028362596329546e-05, |
|
"loss": 0.7253, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.980694367204258e-05, |
|
"loss": 0.7253, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.933026138078969e-05, |
|
"loss": 0.7238, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.88535790895368e-05, |
|
"loss": 0.7286, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.837689679828394e-05, |
|
"loss": 0.7385, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.790021450703106e-05, |
|
"loss": 0.7237, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 0.7329864501953125, |
|
"eval_runtime": 19.7024, |
|
"eval_samples_per_second": 101.51, |
|
"eval_steps_per_second": 3.198, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.742353221577817e-05, |
|
"loss": 0.7311, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.69468499245253e-05, |
|
"loss": 0.7374, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.64701676332724e-05, |
|
"loss": 0.7194, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.599348534201955e-05, |
|
"loss": 0.7237, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.551680305076666e-05, |
|
"loss": 0.7287, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.504012075951377e-05, |
|
"loss": 0.7385, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.45634384682609e-05, |
|
"loss": 0.7319, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.408675617700802e-05, |
|
"loss": 0.7278, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.361007388575515e-05, |
|
"loss": 0.7293, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.313339159450226e-05, |
|
"loss": 0.7232, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_loss": 0.7326176762580872, |
|
"eval_runtime": 20.1581, |
|
"eval_samples_per_second": 99.215, |
|
"eval_steps_per_second": 3.125, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.265670930324937e-05, |
|
"loss": 0.7281, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.21800270119965e-05, |
|
"loss": 0.728, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.170334472074362e-05, |
|
"loss": 0.728, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.122666242949073e-05, |
|
"loss": 0.7221, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.074998013823786e-05, |
|
"loss": 0.7242, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.027329784698497e-05, |
|
"loss": 0.7306, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.979661555573208e-05, |
|
"loss": 0.7218, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.931993326447922e-05, |
|
"loss": 0.7289, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.884325097322634e-05, |
|
"loss": 0.7177, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.836656868197346e-05, |
|
"loss": 0.7265, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 0.7311453819274902, |
|
"eval_runtime": 19.9076, |
|
"eval_samples_per_second": 100.464, |
|
"eval_steps_per_second": 3.165, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.788988639072057e-05, |
|
"loss": 0.7269, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.741320409946769e-05, |
|
"loss": 0.7275, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.693652180821483e-05, |
|
"loss": 0.7317, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.645983951696194e-05, |
|
"loss": 0.7344, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.598315722570906e-05, |
|
"loss": 0.7263, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.550647493445617e-05, |
|
"loss": 0.7299, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.502979264320329e-05, |
|
"loss": 0.724, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.455311035195041e-05, |
|
"loss": 0.7266, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.407642806069754e-05, |
|
"loss": 0.7299, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.359974576944465e-05, |
|
"loss": 0.7236, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 0.7311366200447083, |
|
"eval_runtime": 20.0053, |
|
"eval_samples_per_second": 99.973, |
|
"eval_steps_per_second": 3.149, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.314689759275442e-05, |
|
"loss": 0.7252, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.267021530150154e-05, |
|
"loss": 0.7252, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.219353301024865e-05, |
|
"loss": 0.7188, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.171685071899578e-05, |
|
"loss": 0.7243, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.12401684277429e-05, |
|
"loss": 0.7298, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.076348613649002e-05, |
|
"loss": 0.7325, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.028680384523714e-05, |
|
"loss": 0.7286, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.981012155398426e-05, |
|
"loss": 0.7201, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.933343926273138e-05, |
|
"loss": 0.7184, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.885675697147851e-05, |
|
"loss": 0.7291, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_loss": 0.7308618426322937, |
|
"eval_runtime": 19.7965, |
|
"eval_samples_per_second": 101.028, |
|
"eval_steps_per_second": 3.182, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.838007468022563e-05, |
|
"loss": 0.7318, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.790339238897274e-05, |
|
"loss": 0.7227, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.742671009771986e-05, |
|
"loss": 0.7377, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.695002780646698e-05, |
|
"loss": 0.7367, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.647334551521411e-05, |
|
"loss": 0.7218, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.599666322396122e-05, |
|
"loss": 0.7282, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.551998093270835e-05, |
|
"loss": 0.7231, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.504329864145546e-05, |
|
"loss": 0.7257, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.456661635020258e-05, |
|
"loss": 0.7275, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.40899340589497e-05, |
|
"loss": 0.725, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 0.7301817536354065, |
|
"eval_runtime": 19.7914, |
|
"eval_samples_per_second": 101.054, |
|
"eval_steps_per_second": 3.183, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.361325176769682e-05, |
|
"loss": 0.72, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.313656947644395e-05, |
|
"loss": 0.7267, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.265988718519107e-05, |
|
"loss": 0.7276, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.218320489393818e-05, |
|
"loss": 0.7262, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.17065226026853e-05, |
|
"loss": 0.7149, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.122984031143242e-05, |
|
"loss": 0.7305, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.075315802017954e-05, |
|
"loss": 0.7314, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.027647572892667e-05, |
|
"loss": 0.7154, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.9799793437673786e-05, |
|
"loss": 0.7263, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.93231111464209e-05, |
|
"loss": 0.7203, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 0.7294782996177673, |
|
"eval_runtime": 19.7824, |
|
"eval_samples_per_second": 101.1, |
|
"eval_steps_per_second": 3.185, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.8846428855168024e-05, |
|
"loss": 0.7208, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.836974656391514e-05, |
|
"loss": 0.7266, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.789306427266227e-05, |
|
"loss": 0.7285, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.741638198140939e-05, |
|
"loss": 0.7215, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.6939699690156506e-05, |
|
"loss": 0.7203, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.6463017398903625e-05, |
|
"loss": 0.7314, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.5986335107650744e-05, |
|
"loss": 0.7394, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.550965281639787e-05, |
|
"loss": 0.7138, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.503297052514498e-05, |
|
"loss": 0.721, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.455628823389211e-05, |
|
"loss": 0.7199, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 0.728507936000824, |
|
"eval_runtime": 19.7761, |
|
"eval_samples_per_second": 101.132, |
|
"eval_steps_per_second": 3.186, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.4079605942639226e-05, |
|
"loss": 0.7228, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.3602923651386345e-05, |
|
"loss": 0.7193, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.3126241360133464e-05, |
|
"loss": 0.7269, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.264955906888058e-05, |
|
"loss": 0.729, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.217287677762771e-05, |
|
"loss": 0.7193, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.169619448637483e-05, |
|
"loss": 0.7158, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.121951219512195e-05, |
|
"loss": 0.7158, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.0742829903869065e-05, |
|
"loss": 0.7177, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.0266147612616184e-05, |
|
"loss": 0.7187, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.978946532136331e-05, |
|
"loss": 0.7185, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 0.7283052802085876, |
|
"eval_runtime": 20.2682, |
|
"eval_samples_per_second": 98.677, |
|
"eval_steps_per_second": 3.108, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.931278303011042e-05, |
|
"loss": 0.7264, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.883610073885755e-05, |
|
"loss": 0.7208, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.835941844760467e-05, |
|
"loss": 0.7275, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.7882736156351786e-05, |
|
"loss": 0.7205, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.740605386509891e-05, |
|
"loss": 0.7213, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.692937157384602e-05, |
|
"loss": 0.7324, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.645268928259315e-05, |
|
"loss": 0.7197, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.597600699134027e-05, |
|
"loss": 0.7162, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.5499324700087394e-05, |
|
"loss": 0.7223, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.5022642408834506e-05, |
|
"loss": 0.7249, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 0.7278863191604614, |
|
"eval_runtime": 19.7684, |
|
"eval_samples_per_second": 101.171, |
|
"eval_steps_per_second": 3.187, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.4545960117581625e-05, |
|
"loss": 0.7245, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.406927782632875e-05, |
|
"loss": 0.7298, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.359259553507587e-05, |
|
"loss": 0.7172, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.3115913243822995e-05, |
|
"loss": 0.7183, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.263923095257011e-05, |
|
"loss": 0.7172, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.2162548661317226e-05, |
|
"loss": 0.7166, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.168586637006435e-05, |
|
"loss": 0.7303, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.1209184078811464e-05, |
|
"loss": 0.716, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.073250178755859e-05, |
|
"loss": 0.7199, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.025581949630571e-05, |
|
"loss": 0.7227, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_loss": 0.7274474501609802, |
|
"eval_runtime": 19.9546, |
|
"eval_samples_per_second": 100.228, |
|
"eval_steps_per_second": 3.157, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.9779137205052834e-05, |
|
"loss": 0.7134, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.930245491379995e-05, |
|
"loss": 0.7354, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.8825772622547065e-05, |
|
"loss": 0.7269, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.834909033129419e-05, |
|
"loss": 0.7261, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.787240804004131e-05, |
|
"loss": 0.735, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.739572574878843e-05, |
|
"loss": 0.716, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.691904345753555e-05, |
|
"loss": 0.721, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.644236116628267e-05, |
|
"loss": 0.7201, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.596567887502979e-05, |
|
"loss": 0.7231, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.548899658377691e-05, |
|
"loss": 0.7172, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_loss": 0.7270590662956238, |
|
"eval_runtime": 19.753, |
|
"eval_samples_per_second": 101.251, |
|
"eval_steps_per_second": 3.189, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.501231429252403e-05, |
|
"loss": 0.7296, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.453563200127115e-05, |
|
"loss": 0.7239, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.405894971001827e-05, |
|
"loss": 0.7215, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.358226741876539e-05, |
|
"loss": 0.7176, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.310558512751251e-05, |
|
"loss": 0.7277, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.262890283625963e-05, |
|
"loss": 0.7237, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.215222054500675e-05, |
|
"loss": 0.7167, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.167553825375387e-05, |
|
"loss": 0.7184, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.119885596250099e-05, |
|
"loss": 0.7238, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.072217367124811e-05, |
|
"loss": 0.7188, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 0.7263159155845642, |
|
"eval_runtime": 19.6317, |
|
"eval_samples_per_second": 101.876, |
|
"eval_steps_per_second": 3.209, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.0245491379995232e-05, |
|
"loss": 0.7146, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.9768809088742348e-05, |
|
"loss": 0.7307, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.929212679748947e-05, |
|
"loss": 0.721, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.881544450623659e-05, |
|
"loss": 0.7293, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.833876221498371e-05, |
|
"loss": 0.7245, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.7862079923730833e-05, |
|
"loss": 0.7264, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.7385397632477952e-05, |
|
"loss": 0.722, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.6908715341225068e-05, |
|
"loss": 0.7195, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.643203304997219e-05, |
|
"loss": 0.7181, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.5955350758719312e-05, |
|
"loss": 0.7225, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_loss": 0.7265506386756897, |
|
"eval_runtime": 19.5252, |
|
"eval_samples_per_second": 102.432, |
|
"eval_steps_per_second": 3.227, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.547866846746643e-05, |
|
"loss": 0.7151, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.5001986176213553e-05, |
|
"loss": 0.7211, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.4525303884960672e-05, |
|
"loss": 0.7231, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.404862159370779e-05, |
|
"loss": 0.7236, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.357193930245491e-05, |
|
"loss": 0.7161, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.3095257011202032e-05, |
|
"loss": 0.7248, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.261857471994915e-05, |
|
"loss": 0.7195, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.2141892428696274e-05, |
|
"loss": 0.718, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.1665210137443392e-05, |
|
"loss": 0.7161, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.118852784619051e-05, |
|
"loss": 0.7204, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_loss": 0.7261104583740234, |
|
"eval_runtime": 20.0617, |
|
"eval_samples_per_second": 99.692, |
|
"eval_steps_per_second": 3.14, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.071184555493763e-05, |
|
"loss": 0.716, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.0235163263684753e-05, |
|
"loss": 0.7211, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.975848097243187e-05, |
|
"loss": 0.7242, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.9281798681178994e-05, |
|
"loss": 0.7129, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.8828950504488756e-05, |
|
"loss": 0.7233, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.8352268213235875e-05, |
|
"loss": 0.7286, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.7875585921982997e-05, |
|
"loss": 0.7147, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.7398903630730116e-05, |
|
"loss": 0.7303, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.692222133947724e-05, |
|
"loss": 0.7126, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.6445539048224358e-05, |
|
"loss": 0.7174, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 0.7259587645530701, |
|
"eval_runtime": 20.6636, |
|
"eval_samples_per_second": 96.788, |
|
"eval_steps_per_second": 3.049, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.5968856756971476e-05, |
|
"loss": 0.7147, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.54921744657186e-05, |
|
"loss": 0.7184, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.5015492174465718e-05, |
|
"loss": 0.7218, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.4538809883212837e-05, |
|
"loss": 0.7172, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.4062127591959957e-05, |
|
"loss": 0.7326, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.3585445300707078e-05, |
|
"loss": 0.726, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.3108763009454197e-05, |
|
"loss": 0.711, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.2632080718201317e-05, |
|
"loss": 0.7199, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.215539842694844e-05, |
|
"loss": 0.7256, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.1678716135695557e-05, |
|
"loss": 0.7183, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_loss": 0.7255927324295044, |
|
"eval_runtime": 20.0566, |
|
"eval_samples_per_second": 99.718, |
|
"eval_steps_per_second": 3.141, |
|
"step": 12200 |
|
} |
|
], |
|
"max_steps": 12687, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.5858308142157791e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|