|
{ |
|
"best_metric": 0.6671983599662781, |
|
"best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved-alpaca-belle30b/checkpoint-13800", |
|
"epoch": 2.9773462783171523, |
|
"global_step": 13800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 1.6143, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 1.2447, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017699999999999997, |
|
"loss": 0.9529, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000237, |
|
"loss": 0.8899, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 0.8614, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029958710612097066, |
|
"loss": 0.8402, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029915248098515027, |
|
"loss": 0.8335, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029871785584932993, |
|
"loss": 0.8303, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002982832307135096, |
|
"loss": 0.8261, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002978486055776892, |
|
"loss": 0.807, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 0.8271128535270691, |
|
"eval_runtime": 49.877, |
|
"eval_samples_per_second": 40.099, |
|
"eval_steps_per_second": 0.642, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029741398044186887, |
|
"loss": 0.808, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029697935530604853, |
|
"loss": 0.8092, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029654473017022814, |
|
"loss": 0.8045, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002961101050344078, |
|
"loss": 0.8007, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029567547989858747, |
|
"loss": 0.793, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002952408547627671, |
|
"loss": 0.7886, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00029480622962694674, |
|
"loss": 0.7854, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002943716044911264, |
|
"loss": 0.783, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000293936979355306, |
|
"loss": 0.7797, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002935023542194857, |
|
"loss": 0.7801, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 0.793747067451477, |
|
"eval_runtime": 49.8962, |
|
"eval_samples_per_second": 40.083, |
|
"eval_steps_per_second": 0.641, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029306772908366534, |
|
"loss": 0.7879, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029263310394784495, |
|
"loss": 0.7745, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002921984788120246, |
|
"loss": 0.7725, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002917638536762043, |
|
"loss": 0.7659, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002913292285403839, |
|
"loss": 0.7658, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00029089460340456355, |
|
"loss": 0.7722, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002904599782687432, |
|
"loss": 0.773, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002900253531329228, |
|
"loss": 0.7749, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002895907279971025, |
|
"loss": 0.7734, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00028915610286128215, |
|
"loss": 0.7607, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 0.7771433591842651, |
|
"eval_runtime": 49.9486, |
|
"eval_samples_per_second": 40.041, |
|
"eval_steps_per_second": 0.641, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00028872147772546176, |
|
"loss": 0.7657, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00028828685258964137, |
|
"loss": 0.7602, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00028785222745382103, |
|
"loss": 0.7619, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002874176023180007, |
|
"loss": 0.7587, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002869829771821803, |
|
"loss": 0.7553, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028654835204635997, |
|
"loss": 0.7565, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028611372691053963, |
|
"loss": 0.7586, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028567910177471924, |
|
"loss": 0.7556, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002852444766388989, |
|
"loss": 0.7487, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028480985150307857, |
|
"loss": 0.7516, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 0.7632888555526733, |
|
"eval_runtime": 49.913, |
|
"eval_samples_per_second": 40.07, |
|
"eval_steps_per_second": 0.641, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002843752263672582, |
|
"loss": 0.7527, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028394060123143784, |
|
"loss": 0.7407, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002835059760956175, |
|
"loss": 0.744, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002830713509597971, |
|
"loss": 0.7456, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002826367258239768, |
|
"loss": 0.7429, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00028220210068815644, |
|
"loss": 0.7516, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00028176747555233605, |
|
"loss": 0.7381, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002813328504165157, |
|
"loss": 0.7256, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002808982252806954, |
|
"loss": 0.7443, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.000280463600144875, |
|
"loss": 0.7389, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 0.7532852292060852, |
|
"eval_runtime": 49.9829, |
|
"eval_samples_per_second": 40.014, |
|
"eval_steps_per_second": 0.64, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00028002897500905465, |
|
"loss": 0.7374, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002795943498732343, |
|
"loss": 0.7296, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002791597247374139, |
|
"loss": 0.7424, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002787250996015936, |
|
"loss": 0.7328, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00027829047446577325, |
|
"loss": 0.7367, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00027785584932995286, |
|
"loss": 0.7419, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002774212241941325, |
|
"loss": 0.7347, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002769865990583122, |
|
"loss": 0.7292, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002765519739224918, |
|
"loss": 0.7394, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00027611734878667146, |
|
"loss": 0.7358, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 0.7463639974594116, |
|
"eval_runtime": 49.9963, |
|
"eval_samples_per_second": 40.003, |
|
"eval_steps_per_second": 0.64, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002756827236508511, |
|
"loss": 0.7266, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00027524809851503073, |
|
"loss": 0.7336, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002748134733792104, |
|
"loss": 0.7296, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00027437884824339006, |
|
"loss": 0.73, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00027394422310756967, |
|
"loss": 0.7312, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00027350959797174933, |
|
"loss": 0.7307, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000273074972835929, |
|
"loss": 0.7246, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002726403477001086, |
|
"loss": 0.7299, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00027220572256428827, |
|
"loss": 0.7251, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00027177109742846793, |
|
"loss": 0.7286, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 0.7393819093704224, |
|
"eval_runtime": 49.9896, |
|
"eval_samples_per_second": 40.008, |
|
"eval_steps_per_second": 0.64, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00027133647229264754, |
|
"loss": 0.7186, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002709018471568272, |
|
"loss": 0.7215, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00027046722202100687, |
|
"loss": 0.7295, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002700325968851865, |
|
"loss": 0.7198, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00026959797174936614, |
|
"loss": 0.7184, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002691633466135458, |
|
"loss": 0.7283, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002687287214777254, |
|
"loss": 0.7378, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002682940963419051, |
|
"loss": 0.7196, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00026785947120608474, |
|
"loss": 0.7152, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00026742484607026435, |
|
"loss": 0.7184, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 0.7342154383659363, |
|
"eval_runtime": 49.9957, |
|
"eval_samples_per_second": 40.003, |
|
"eval_steps_per_second": 0.64, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.000266990220934444, |
|
"loss": 0.7164, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002665555957986237, |
|
"loss": 0.7136, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002661209706628033, |
|
"loss": 0.7203, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00026568634552698295, |
|
"loss": 0.7158, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002652517203911626, |
|
"loss": 0.7145, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002648170952553422, |
|
"loss": 0.7111, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0002643824701195219, |
|
"loss": 0.7155, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00026394784498370155, |
|
"loss": 0.718, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00026351321984788116, |
|
"loss": 0.7125, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002630785947120608, |
|
"loss": 0.7163, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 0.7301950454711914, |
|
"eval_runtime": 49.9689, |
|
"eval_samples_per_second": 40.025, |
|
"eval_steps_per_second": 0.64, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002626439695762405, |
|
"loss": 0.7121, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002622093444404201, |
|
"loss": 0.7092, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00026177471930459976, |
|
"loss": 0.7133, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002613400941687794, |
|
"loss": 0.7171, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00026090546903295903, |
|
"loss": 0.7235, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002604708438971387, |
|
"loss": 0.7086, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00026003621876131836, |
|
"loss": 0.7136, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00025960159362549797, |
|
"loss": 0.7031, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00025916696848967763, |
|
"loss": 0.7084, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002587323433538573, |
|
"loss": 0.7091, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 0.726446270942688, |
|
"eval_runtime": 50.0519, |
|
"eval_samples_per_second": 39.959, |
|
"eval_steps_per_second": 0.639, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002582977182180369, |
|
"loss": 0.7119, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00025786309308221657, |
|
"loss": 0.7186, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00025742846794639623, |
|
"loss": 0.703, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00025699384281057584, |
|
"loss": 0.7078, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0002565592176747555, |
|
"loss": 0.7084, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00025612459253893517, |
|
"loss": 0.7014, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0002556899674031148, |
|
"loss": 0.7076, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00025525534226729444, |
|
"loss": 0.7103, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002548207171314741, |
|
"loss": 0.7118, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002543860919956537, |
|
"loss": 0.7028, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 0.7220268845558167, |
|
"eval_runtime": 49.9937, |
|
"eval_samples_per_second": 40.005, |
|
"eval_steps_per_second": 0.64, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0002539514668598334, |
|
"loss": 0.707, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00025351684172401304, |
|
"loss": 0.7045, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00025308221658819265, |
|
"loss": 0.6905, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002526475914523723, |
|
"loss": 0.6982, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.000252212966316552, |
|
"loss": 0.706, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0002517783411807316, |
|
"loss": 0.6992, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00025134371604491125, |
|
"loss": 0.6939, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00025090909090909086, |
|
"loss": 0.7037, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002504744657732705, |
|
"loss": 0.7127, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00025003984063745014, |
|
"loss": 0.702, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 0.7191869020462036, |
|
"eval_runtime": 50.0038, |
|
"eval_samples_per_second": 39.997, |
|
"eval_steps_per_second": 0.64, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0002496052155016298, |
|
"loss": 0.7033, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00024917059036580946, |
|
"loss": 0.7028, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00024873596522998907, |
|
"loss": 0.6967, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00024830134009416874, |
|
"loss": 0.7068, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002478667149583484, |
|
"loss": 0.7105, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.000247432089822528, |
|
"loss": 0.6968, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00024699746468670767, |
|
"loss": 0.7025, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00024656283955088734, |
|
"loss": 0.6942, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00024612821441506694, |
|
"loss": 0.6948, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002456935892792466, |
|
"loss": 0.6979, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 0.715853750705719, |
|
"eval_runtime": 50.0426, |
|
"eval_samples_per_second": 39.966, |
|
"eval_steps_per_second": 0.639, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00024525896414342627, |
|
"loss": 0.6967, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002448243390076059, |
|
"loss": 0.7012, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00024438971387178554, |
|
"loss": 0.697, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002439550887359652, |
|
"loss": 0.6931, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00024352046360014485, |
|
"loss": 0.6856, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00024308583846432448, |
|
"loss": 0.697, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00024265121332850415, |
|
"loss": 0.6996, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00024221658819268378, |
|
"loss": 0.698, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00024178196305686342, |
|
"loss": 0.6952, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00024134733792104308, |
|
"loss": 0.7049, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 0.7124837040901184, |
|
"eval_runtime": 50.0654, |
|
"eval_samples_per_second": 39.948, |
|
"eval_steps_per_second": 0.639, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00024091271278522272, |
|
"loss": 0.6927, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00024047808764940235, |
|
"loss": 0.6996, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00024004346251358202, |
|
"loss": 0.6921, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00023960883737776165, |
|
"loss": 0.695, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0002391742122419413, |
|
"loss": 0.6887, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00023873958710612095, |
|
"loss": 0.6915, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0002383049619703006, |
|
"loss": 0.6915, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00023787033683448023, |
|
"loss": 0.6916, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0002374357116986599, |
|
"loss": 0.687, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00023700108656283953, |
|
"loss": 0.6997, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 0.7098860144615173, |
|
"eval_runtime": 50.0652, |
|
"eval_samples_per_second": 39.948, |
|
"eval_steps_per_second": 0.639, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00023656646142701916, |
|
"loss": 0.6895, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023613183629119883, |
|
"loss": 0.6861, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023569721115537846, |
|
"loss": 0.6988, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002352625860195581, |
|
"loss": 0.6852, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00023482796088373776, |
|
"loss": 0.6863, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002343933357479174, |
|
"loss": 0.6943, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00023395871061209704, |
|
"loss": 0.686, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002335240854762767, |
|
"loss": 0.684, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023308946034045634, |
|
"loss": 0.6866, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023265483520463597, |
|
"loss": 0.6859, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 0.7077216506004333, |
|
"eval_runtime": 50.0526, |
|
"eval_samples_per_second": 39.958, |
|
"eval_steps_per_second": 0.639, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023222021006881564, |
|
"loss": 0.6845, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00023178558493299527, |
|
"loss": 0.7011, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002313509597971749, |
|
"loss": 0.69, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00023091633466135457, |
|
"loss": 0.6931, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002304817095255342, |
|
"loss": 0.6998, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00023004708438971385, |
|
"loss": 0.6933, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0002296124592538935, |
|
"loss": 0.6859, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022917783411807315, |
|
"loss": 0.6972, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00022874320898225278, |
|
"loss": 0.6868, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00022830858384643245, |
|
"loss": 0.6902, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 0.7059928178787231, |
|
"eval_runtime": 50.0118, |
|
"eval_samples_per_second": 39.991, |
|
"eval_steps_per_second": 0.64, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00022787395871061208, |
|
"loss": 0.6819, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00022743933357479172, |
|
"loss": 0.6833, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00022700470843897138, |
|
"loss": 0.6826, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00022657008330315102, |
|
"loss": 0.694, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022613545816733066, |
|
"loss": 0.6827, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022570083303151032, |
|
"loss": 0.6844, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022526620789568996, |
|
"loss": 0.6893, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0002248315827598696, |
|
"loss": 0.6843, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00022439695762404926, |
|
"loss": 0.6843, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0002239623324882289, |
|
"loss": 0.691, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 0.7041522264480591, |
|
"eval_runtime": 50.0554, |
|
"eval_samples_per_second": 39.956, |
|
"eval_steps_per_second": 0.639, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00022352770735240853, |
|
"loss": 0.6846, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0002230930822165882, |
|
"loss": 0.689, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022265845708076783, |
|
"loss": 0.6777, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022222383194494747, |
|
"loss": 0.6903, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00022178920680912713, |
|
"loss": 0.684, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00022135458167330677, |
|
"loss": 0.6867, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0002209199565374864, |
|
"loss": 0.6697, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00022048533140166607, |
|
"loss": 0.6864, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0002200507062658457, |
|
"loss": 0.6813, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00021961608113002534, |
|
"loss": 0.6807, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 0.7024796009063721, |
|
"eval_runtime": 50.022, |
|
"eval_samples_per_second": 39.982, |
|
"eval_steps_per_second": 0.64, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.000219181455994205, |
|
"loss": 0.6824, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00021874683085838464, |
|
"loss": 0.6814, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00021831220572256427, |
|
"loss": 0.6789, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00021787758058674394, |
|
"loss": 0.6752, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00021744295545092358, |
|
"loss": 0.6826, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0002170083303151032, |
|
"loss": 0.6874, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00021657370517928288, |
|
"loss": 0.6761, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0002161390800434625, |
|
"loss": 0.6795, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00021570445490764215, |
|
"loss": 0.6781, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0002152698297718218, |
|
"loss": 0.6754, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 0.7004331350326538, |
|
"eval_runtime": 50.0568, |
|
"eval_samples_per_second": 39.955, |
|
"eval_steps_per_second": 0.639, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00021483520463600145, |
|
"loss": 0.6791, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00021440057950018108, |
|
"loss": 0.6863, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00021396595436436075, |
|
"loss": 0.6846, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00021353132922854036, |
|
"loss": 0.6814, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00021309670409272, |
|
"loss": 0.6825, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00021266207895689963, |
|
"loss": 0.6827, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0002122274538210793, |
|
"loss": 0.6769, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00021179282868525893, |
|
"loss": 0.6869, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00021135820354943857, |
|
"loss": 0.6815, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00021092357841361823, |
|
"loss": 0.6725, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 0.6981337666511536, |
|
"eval_runtime": 50.0559, |
|
"eval_samples_per_second": 39.955, |
|
"eval_steps_per_second": 0.639, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00021051068453458889, |
|
"loss": 0.6731, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00021007605939876855, |
|
"loss": 0.6792, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00020964143426294819, |
|
"loss": 0.6755, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00020920680912712782, |
|
"loss": 0.6833, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0002087721839913075, |
|
"loss": 0.6693, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00020833755885548712, |
|
"loss": 0.6728, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020790293371966676, |
|
"loss": 0.6812, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020746830858384642, |
|
"loss": 0.6734, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020703368344802606, |
|
"loss": 0.6813, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002065990583122057, |
|
"loss": 0.6779, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 0.6968498826026917, |
|
"eval_runtime": 50.0697, |
|
"eval_samples_per_second": 39.944, |
|
"eval_steps_per_second": 0.639, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00020616443317638536, |
|
"loss": 0.6712, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.000205729808040565, |
|
"loss": 0.6846, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00020529518290474463, |
|
"loss": 0.6694, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0002048605577689243, |
|
"loss": 0.6753, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00020442593263310393, |
|
"loss": 0.6792, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020399130749728357, |
|
"loss": 0.6738, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020355668236146323, |
|
"loss": 0.6699, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020312205722564287, |
|
"loss": 0.6737, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0002026874320898225, |
|
"loss": 0.6837, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00020225280695400217, |
|
"loss": 0.6701, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 0.6954157948493958, |
|
"eval_runtime": 50.0724, |
|
"eval_samples_per_second": 39.942, |
|
"eval_steps_per_second": 0.639, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0002018181818181818, |
|
"loss": 0.6677, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00020138355668236144, |
|
"loss": 0.6706, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0002009489315465411, |
|
"loss": 0.6741, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00020051430641072074, |
|
"loss": 0.6757, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00020007968127490038, |
|
"loss": 0.6773, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019964505613908004, |
|
"loss": 0.6728, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019921043100325968, |
|
"loss": 0.6715, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019877580586743931, |
|
"loss": 0.6679, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019834118073161898, |
|
"loss": 0.6729, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019790655559579861, |
|
"loss": 0.6749, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 0.6941403746604919, |
|
"eval_runtime": 50.0645, |
|
"eval_samples_per_second": 39.948, |
|
"eval_steps_per_second": 0.639, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019747193045997825, |
|
"loss": 0.6661, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0001970373053241579, |
|
"loss": 0.6638, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019660268018833755, |
|
"loss": 0.6715, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0001961680550525172, |
|
"loss": 0.6721, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00019573342991669682, |
|
"loss": 0.6695, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001952988047808765, |
|
"loss": 0.6809, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019486417964505612, |
|
"loss": 0.6701, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019442955450923576, |
|
"loss": 0.6747, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019399492937341542, |
|
"loss": 0.6713, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019356030423759506, |
|
"loss": 0.6746, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 0.6935788989067078, |
|
"eval_runtime": 50.0137, |
|
"eval_samples_per_second": 39.989, |
|
"eval_steps_per_second": 0.64, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0001931256791017747, |
|
"loss": 0.672, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019269105396595436, |
|
"loss": 0.6673, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.000192256428830134, |
|
"loss": 0.6706, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00019182180369431363, |
|
"loss": 0.6677, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0001913871785584933, |
|
"loss": 0.67, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00019095255342267293, |
|
"loss": 0.6693, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00019051792828685257, |
|
"loss": 0.671, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00019008330315103223, |
|
"loss": 0.6748, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00018964867801521187, |
|
"loss": 0.6698, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0001892140528793915, |
|
"loss": 0.662, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 0.6918168663978577, |
|
"eval_runtime": 50.0897, |
|
"eval_samples_per_second": 39.928, |
|
"eval_steps_per_second": 0.639, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00018877942774357117, |
|
"loss": 0.66, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0001883448026077508, |
|
"loss": 0.6705, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00018791017747193044, |
|
"loss": 0.6693, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0001874755523361101, |
|
"loss": 0.6546, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018704092720028974, |
|
"loss": 0.6673, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00018660630206446938, |
|
"loss": 0.671, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00018617167692864904, |
|
"loss": 0.675, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00018573705179282868, |
|
"loss": 0.6744, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00018530242665700832, |
|
"loss": 0.6643, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018486780152118798, |
|
"loss": 0.6686, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 0.6908227801322937, |
|
"eval_runtime": 50.0742, |
|
"eval_samples_per_second": 39.941, |
|
"eval_steps_per_second": 0.639, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018443317638536762, |
|
"loss": 0.6666, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018399855124954725, |
|
"loss": 0.6658, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0001835639261137269, |
|
"loss": 0.671, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00018312930097790653, |
|
"loss": 0.6736, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00018269467584208616, |
|
"loss": 0.6697, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00018226005070626583, |
|
"loss": 0.6718, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00018182542557044546, |
|
"loss": 0.6701, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0001813908004346251, |
|
"loss": 0.6696, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00018095617529880476, |
|
"loss": 0.6611, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0001805215501629844, |
|
"loss": 0.6638, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 0.689289927482605, |
|
"eval_runtime": 50.1304, |
|
"eval_samples_per_second": 39.896, |
|
"eval_steps_per_second": 0.638, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00018008692502716404, |
|
"loss": 0.6646, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0001796522998913437, |
|
"loss": 0.6717, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00017921767475552334, |
|
"loss": 0.6647, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00017878304961970297, |
|
"loss": 0.672, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00017834842448388264, |
|
"loss": 0.6645, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00017791379934806227, |
|
"loss": 0.6768, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0001774791742122419, |
|
"loss": 0.6748, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00017704454907642157, |
|
"loss": 0.6722, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0001766099239406012, |
|
"loss": 0.6631, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00017617529880478084, |
|
"loss": 0.6647, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 0.688850462436676, |
|
"eval_runtime": 50.0542, |
|
"eval_samples_per_second": 39.957, |
|
"eval_steps_per_second": 0.639, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0001757406736689605, |
|
"loss": 0.66, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00017530604853314014, |
|
"loss": 0.6682, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00017487142339731978, |
|
"loss": 0.6589, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00017443679826149944, |
|
"loss": 0.6691, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00017400217312567908, |
|
"loss": 0.6726, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00017356754798985872, |
|
"loss": 0.6628, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00017313292285403838, |
|
"loss": 0.6719, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00017269829771821802, |
|
"loss": 0.6648, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00017226367258239765, |
|
"loss": 0.6594, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00017182904744657732, |
|
"loss": 0.6717, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 0.6876093745231628, |
|
"eval_runtime": 50.1763, |
|
"eval_samples_per_second": 39.859, |
|
"eval_steps_per_second": 0.638, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00017139442231075695, |
|
"loss": 0.6632, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0001709597971749366, |
|
"loss": 0.6619, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00017052517203911625, |
|
"loss": 0.667, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0001700905469032959, |
|
"loss": 0.6625, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00016965592176747553, |
|
"loss": 0.6661, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0001692212966316552, |
|
"loss": 0.656, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00016878667149583483, |
|
"loss": 0.6668, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00016835204636001446, |
|
"loss": 0.6669, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00016791742122419413, |
|
"loss": 0.6662, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016748279608837376, |
|
"loss": 0.6692, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_loss": 0.6869744658470154, |
|
"eval_runtime": 50.1517, |
|
"eval_samples_per_second": 39.879, |
|
"eval_steps_per_second": 0.638, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0001670481709525534, |
|
"loss": 0.6571, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00016661354581673306, |
|
"loss": 0.6659, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0001661789206809127, |
|
"loss": 0.6622, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00016574429554509234, |
|
"loss": 0.6522, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.000165309670409272, |
|
"loss": 0.667, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00016487504527345164, |
|
"loss": 0.6644, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00016444042013763127, |
|
"loss": 0.6625, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00016400579500181094, |
|
"loss": 0.6686, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00016357116986599057, |
|
"loss": 0.6562, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0001631365447301702, |
|
"loss": 0.6595, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 0.685205340385437, |
|
"eval_runtime": 50.162, |
|
"eval_samples_per_second": 39.871, |
|
"eval_steps_per_second": 0.638, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00016270191959434987, |
|
"loss": 0.6595, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0001622672944585295, |
|
"loss": 0.6644, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00016183266932270915, |
|
"loss": 0.6647, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0001613980441868888, |
|
"loss": 0.6655, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00016096341905106845, |
|
"loss": 0.6564, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00016052879391524808, |
|
"loss": 0.6578, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00016009416877942775, |
|
"loss": 0.6624, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00015965954364360738, |
|
"loss": 0.6633, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00015922491850778702, |
|
"loss": 0.6616, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00015879029337196668, |
|
"loss": 0.6607, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 0.6847727298736572, |
|
"eval_runtime": 50.1562, |
|
"eval_samples_per_second": 39.875, |
|
"eval_steps_per_second": 0.638, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00015835566823614632, |
|
"loss": 0.6564, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00015792104310032596, |
|
"loss": 0.66, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00015748641796450562, |
|
"loss": 0.6589, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00015705179282868526, |
|
"loss": 0.6596, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001566171676928649, |
|
"loss": 0.6663, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00015618254255704456, |
|
"loss": 0.6603, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001557479174212242, |
|
"loss": 0.6674, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00015531329228540383, |
|
"loss": 0.6603, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0001548786671495835, |
|
"loss": 0.6612, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015444404201376313, |
|
"loss": 0.6609, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 0.683903694152832, |
|
"eval_runtime": 50.079, |
|
"eval_samples_per_second": 39.937, |
|
"eval_steps_per_second": 0.639, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015400941687794277, |
|
"loss": 0.6557, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015357479174212243, |
|
"loss": 0.6627, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015314016660630207, |
|
"loss": 0.6667, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0001527055414704817, |
|
"loss": 0.6633, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00015227091633466137, |
|
"loss": 0.6565, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.000151836291198841, |
|
"loss": 0.6588, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00015140166606302064, |
|
"loss": 0.6687, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0001509670409272003, |
|
"loss": 0.6611, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00015053241579137994, |
|
"loss": 0.6576, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00015009779065555957, |
|
"loss": 0.6576, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 0.6830142736434937, |
|
"eval_runtime": 50.1233, |
|
"eval_samples_per_second": 39.902, |
|
"eval_steps_per_second": 0.638, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0001496631655197392, |
|
"loss": 0.6617, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00014922854038391885, |
|
"loss": 0.6533, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0001487939152480985, |
|
"loss": 0.6524, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00014835929011227815, |
|
"loss": 0.6597, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00014792466497645778, |
|
"loss": 0.656, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00014749003984063745, |
|
"loss": 0.6501, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00014705541470481708, |
|
"loss": 0.6563, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00014662078956899672, |
|
"loss": 0.6496, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00014618616443317638, |
|
"loss": 0.6602, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00014575153929735602, |
|
"loss": 0.6617, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 0.6818540096282959, |
|
"eval_runtime": 50.1175, |
|
"eval_samples_per_second": 39.906, |
|
"eval_steps_per_second": 0.639, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014531691416153566, |
|
"loss": 0.6655, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014488228902571532, |
|
"loss": 0.6544, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00014444766388989496, |
|
"loss": 0.655, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0001440130387540746, |
|
"loss": 0.6535, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00014357841361825426, |
|
"loss": 0.6584, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0001431437884824339, |
|
"loss": 0.6602, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00014270916334661353, |
|
"loss": 0.6689, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0001422745382107932, |
|
"loss": 0.6613, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00014183991307497283, |
|
"loss": 0.659, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00014140528793915247, |
|
"loss": 0.6463, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 0.681868851184845, |
|
"eval_runtime": 50.1388, |
|
"eval_samples_per_second": 39.889, |
|
"eval_steps_per_second": 0.638, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00014097066280333213, |
|
"loss": 0.6617, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00014053603766751177, |
|
"loss": 0.6648, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0001401014125316914, |
|
"loss": 0.6528, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00013966678739587107, |
|
"loss": 0.6655, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0001392321622600507, |
|
"loss": 0.6609, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00013879753712423034, |
|
"loss": 0.6528, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00013836291198841, |
|
"loss": 0.6561, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00013792828685258964, |
|
"loss": 0.6682, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00013749366171676928, |
|
"loss": 0.6677, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00013705903658094894, |
|
"loss": 0.6599, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 0.6807426810264587, |
|
"eval_runtime": 50.3308, |
|
"eval_samples_per_second": 39.737, |
|
"eval_steps_per_second": 0.636, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00013662441144512855, |
|
"loss": 0.6525, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0001361897863093082, |
|
"loss": 0.6574, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00013575516117348785, |
|
"loss": 0.6516, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013532053603766749, |
|
"loss": 0.6533, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013488591090184715, |
|
"loss": 0.6577, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00013445128576602679, |
|
"loss": 0.6592, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00013401666063020642, |
|
"loss": 0.6585, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00013358203549438609, |
|
"loss": 0.6607, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00013314741035856572, |
|
"loss": 0.6617, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00013271278522274536, |
|
"loss": 0.6443, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 0.6800745725631714, |
|
"eval_runtime": 50.165, |
|
"eval_samples_per_second": 39.868, |
|
"eval_steps_per_second": 0.638, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013227816008692502, |
|
"loss": 0.6587, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013184353495110466, |
|
"loss": 0.6613, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0001314089098152843, |
|
"loss": 0.654, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00013097428467946396, |
|
"loss": 0.6523, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0001305396595436436, |
|
"loss": 0.6563, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00013010503440782323, |
|
"loss": 0.6524, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0001296704092720029, |
|
"loss": 0.6523, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00012923578413618253, |
|
"loss": 0.6493, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00012880115900036217, |
|
"loss": 0.6538, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00012836653386454183, |
|
"loss": 0.6512, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 0.6790341734886169, |
|
"eval_runtime": 50.1317, |
|
"eval_samples_per_second": 39.895, |
|
"eval_steps_per_second": 0.638, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00012793190872872147, |
|
"loss": 0.6562, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0001274972835929011, |
|
"loss": 0.6556, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00012706265845708077, |
|
"loss": 0.65, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001266280333212604, |
|
"loss": 0.661, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00012619340818544004, |
|
"loss": 0.655, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0001257587830496197, |
|
"loss": 0.6534, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012532415791379934, |
|
"loss": 0.6517, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012488953277797898, |
|
"loss": 0.6605, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012445490764215864, |
|
"loss": 0.6556, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00012402028250633828, |
|
"loss": 0.6492, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_loss": 0.6781870126724243, |
|
"eval_runtime": 50.0809, |
|
"eval_samples_per_second": 39.935, |
|
"eval_steps_per_second": 0.639, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00012358565737051791, |
|
"loss": 0.6541, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00012315103223469758, |
|
"loss": 0.6517, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00012271640709887721, |
|
"loss": 0.6483, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012228178196305685, |
|
"loss": 0.6619, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0001218471568272365, |
|
"loss": 0.6556, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012141253169141615, |
|
"loss": 0.6471, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012097790655559579, |
|
"loss": 0.6611, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012054328141977544, |
|
"loss": 0.6506, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00012010865628395509, |
|
"loss": 0.6611, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00011967403114813472, |
|
"loss": 0.6557, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_loss": 0.6776989102363586, |
|
"eval_runtime": 50.1344, |
|
"eval_samples_per_second": 39.893, |
|
"eval_steps_per_second": 0.638, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00011923940601231437, |
|
"loss": 0.6504, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00011880478087649402, |
|
"loss": 0.6552, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011839188699746468, |
|
"loss": 0.641, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011795726186164432, |
|
"loss": 0.6535, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011752263672582397, |
|
"loss": 0.6568, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00011708801159000362, |
|
"loss": 0.6621, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00011665338645418325, |
|
"loss": 0.6607, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0001162187613183629, |
|
"loss": 0.6516, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00011578413618254255, |
|
"loss": 0.6497, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011534951104672219, |
|
"loss": 0.6559, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 0.6773191094398499, |
|
"eval_runtime": 50.1605, |
|
"eval_samples_per_second": 39.872, |
|
"eval_steps_per_second": 0.638, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011491488591090184, |
|
"loss": 0.6595, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011448026077508149, |
|
"loss": 0.6495, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00011404563563926113, |
|
"loss": 0.6518, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00011361101050344078, |
|
"loss": 0.6511, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011317638536762043, |
|
"loss": 0.6495, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011274176023180006, |
|
"loss": 0.6485, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00011230713509597971, |
|
"loss": 0.6543, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00011187250996015936, |
|
"loss": 0.6509, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.000111437884824339, |
|
"loss": 0.656, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00011100325968851865, |
|
"loss": 0.6557, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_loss": 0.6773696541786194, |
|
"eval_runtime": 50.1296, |
|
"eval_samples_per_second": 39.897, |
|
"eval_steps_per_second": 0.638, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0001105686345526983, |
|
"loss": 0.6509, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00011013400941687794, |
|
"loss": 0.65, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010969938428105759, |
|
"loss": 0.6447, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010926475914523724, |
|
"loss": 0.6563, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010883013400941687, |
|
"loss": 0.6545, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010839550887359652, |
|
"loss": 0.6509, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010796088373777617, |
|
"loss": 0.6434, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010752625860195581, |
|
"loss": 0.6412, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010709163346613546, |
|
"loss": 0.6512, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010665700833031508, |
|
"loss": 0.6478, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_loss": 0.6760911345481873, |
|
"eval_runtime": 50.1795, |
|
"eval_samples_per_second": 39.857, |
|
"eval_steps_per_second": 0.638, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010622238319449473, |
|
"loss": 0.6545, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010578775805867438, |
|
"loss": 0.6468, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010535313292285402, |
|
"loss": 0.6527, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010491850778703367, |
|
"loss": 0.6621, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010448388265121332, |
|
"loss": 0.6496, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00010404925751539295, |
|
"loss": 0.6512, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0001036146323795726, |
|
"loss": 0.6491, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010318000724375225, |
|
"loss": 0.6482, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010274538210793189, |
|
"loss": 0.6456, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010231075697211154, |
|
"loss": 0.6458, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_loss": 0.6748936772346497, |
|
"eval_runtime": 50.1856, |
|
"eval_samples_per_second": 39.852, |
|
"eval_steps_per_second": 0.638, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010187613183629119, |
|
"loss": 0.6473, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010144150670047083, |
|
"loss": 0.6496, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010100688156465048, |
|
"loss": 0.6566, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010057225642883013, |
|
"loss": 0.6475, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.00010013763129300976, |
|
"loss": 0.6536, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.970300615718941e-05, |
|
"loss": 0.646, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.926838102136906e-05, |
|
"loss": 0.6503, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.88337558855487e-05, |
|
"loss": 0.6527, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.839913074972835e-05, |
|
"loss": 0.6514, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.7964505613908e-05, |
|
"loss": 0.6548, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 0.6744834780693054, |
|
"eval_runtime": 50.1696, |
|
"eval_samples_per_second": 39.865, |
|
"eval_steps_per_second": 0.638, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.752988047808764e-05, |
|
"loss": 0.6483, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.709525534226729e-05, |
|
"loss": 0.6522, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.666063020644694e-05, |
|
"loss": 0.6538, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.622600507062657e-05, |
|
"loss": 0.6449, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.579137993480622e-05, |
|
"loss": 0.6451, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.535675479898587e-05, |
|
"loss": 0.6355, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.492212966316551e-05, |
|
"loss": 0.6494, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.448750452734516e-05, |
|
"loss": 0.6435, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.405287939152481e-05, |
|
"loss": 0.651, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.361825425570445e-05, |
|
"loss": 0.6493, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_loss": 0.674017071723938, |
|
"eval_runtime": 50.1402, |
|
"eval_samples_per_second": 39.888, |
|
"eval_steps_per_second": 0.638, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.31836291198841e-05, |
|
"loss": 0.6469, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.274900398406375e-05, |
|
"loss": 0.65, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.231437884824338e-05, |
|
"loss": 0.6536, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.187975371242303e-05, |
|
"loss": 0.6488, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.144512857660268e-05, |
|
"loss": 0.6391, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.101050344078232e-05, |
|
"loss": 0.644, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.057587830496197e-05, |
|
"loss": 0.6507, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 9.014125316914162e-05, |
|
"loss": 0.6404, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.970662803332126e-05, |
|
"loss": 0.6509, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.92720028975009e-05, |
|
"loss": 0.6435, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 0.6735255122184753, |
|
"eval_runtime": 50.1703, |
|
"eval_samples_per_second": 39.864, |
|
"eval_steps_per_second": 0.638, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.883737776168056e-05, |
|
"loss": 0.6374, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.840275262586019e-05, |
|
"loss": 0.6445, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.796812749003983e-05, |
|
"loss": 0.6495, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.753350235421946e-05, |
|
"loss": 0.6482, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.709887721839911e-05, |
|
"loss": 0.6441, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.666425208257877e-05, |
|
"loss": 0.6525, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.62296269467584e-05, |
|
"loss": 0.6453, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.579500181093805e-05, |
|
"loss": 0.6498, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.53603766751177e-05, |
|
"loss": 0.6471, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.492575153929734e-05, |
|
"loss": 0.6419, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 0.6730753779411316, |
|
"eval_runtime": 50.1885, |
|
"eval_samples_per_second": 39.85, |
|
"eval_steps_per_second": 0.638, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.449112640347699e-05, |
|
"loss": 0.6447, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.405650126765664e-05, |
|
"loss": 0.6444, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.362187613183627e-05, |
|
"loss": 0.6393, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.318725099601592e-05, |
|
"loss": 0.6464, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.275262586019557e-05, |
|
"loss": 0.6458, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.231800072437521e-05, |
|
"loss": 0.6402, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.188337558855486e-05, |
|
"loss": 0.6409, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.144875045273451e-05, |
|
"loss": 0.6512, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.101412531691415e-05, |
|
"loss": 0.6498, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.05795001810938e-05, |
|
"loss": 0.6393, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 0.6726437211036682, |
|
"eval_runtime": 50.1492, |
|
"eval_samples_per_second": 39.881, |
|
"eval_steps_per_second": 0.638, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.014487504527345e-05, |
|
"loss": 0.6458, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.971024990945308e-05, |
|
"loss": 0.6466, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.927562477363273e-05, |
|
"loss": 0.644, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.884099963781238e-05, |
|
"loss": 0.6467, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.840637450199202e-05, |
|
"loss": 0.6436, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.797174936617167e-05, |
|
"loss": 0.6422, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.753712423035132e-05, |
|
"loss": 0.645, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.710249909453096e-05, |
|
"loss": 0.6423, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.666787395871061e-05, |
|
"loss": 0.6557, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.623324882289026e-05, |
|
"loss": 0.646, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_loss": 0.6725419759750366, |
|
"eval_runtime": 50.1975, |
|
"eval_samples_per_second": 39.843, |
|
"eval_steps_per_second": 0.637, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.57986236870699e-05, |
|
"loss": 0.6503, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.536399855124954e-05, |
|
"loss": 0.6428, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.49293734154292e-05, |
|
"loss": 0.6438, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.449474827960883e-05, |
|
"loss": 0.6427, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.406012314378847e-05, |
|
"loss": 0.6458, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.362549800796812e-05, |
|
"loss": 0.6423, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.319087287214777e-05, |
|
"loss": 0.6466, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.27562477363274e-05, |
|
"loss": 0.6394, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.232162260050705e-05, |
|
"loss": 0.6362, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.18869974646867e-05, |
|
"loss": 0.6399, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 0.6719211935997009, |
|
"eval_runtime": 50.1808, |
|
"eval_samples_per_second": 39.856, |
|
"eval_steps_per_second": 0.638, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.145237232886634e-05, |
|
"loss": 0.6378, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.101774719304599e-05, |
|
"loss": 0.634, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.058312205722564e-05, |
|
"loss": 0.6374, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.014849692140528e-05, |
|
"loss": 0.6464, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.971387178558493e-05, |
|
"loss": 0.643, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.927924664976458e-05, |
|
"loss": 0.6384, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.884462151394421e-05, |
|
"loss": 0.6451, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.840999637812386e-05, |
|
"loss": 0.6465, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.799710249909452e-05, |
|
"loss": 0.646, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.756247736327417e-05, |
|
"loss": 0.6525, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 0.6714358925819397, |
|
"eval_runtime": 50.1294, |
|
"eval_samples_per_second": 39.897, |
|
"eval_steps_per_second": 0.638, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.712785222745382e-05, |
|
"loss": 0.6423, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.669322709163345e-05, |
|
"loss": 0.6449, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.62586019558131e-05, |
|
"loss": 0.6325, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.582397681999275e-05, |
|
"loss": 0.6558, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.538935168417239e-05, |
|
"loss": 0.6419, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.495472654835204e-05, |
|
"loss": 0.6466, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.452010141253169e-05, |
|
"loss": 0.6357, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.408547627671133e-05, |
|
"loss": 0.6366, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.365085114089098e-05, |
|
"loss": 0.6466, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.321622600507063e-05, |
|
"loss": 0.6542, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_loss": 0.6710445880889893, |
|
"eval_runtime": 50.2479, |
|
"eval_samples_per_second": 39.803, |
|
"eval_steps_per_second": 0.637, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.278160086925026e-05, |
|
"loss": 0.6481, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.23469757334299e-05, |
|
"loss": 0.6425, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.191235059760955e-05, |
|
"loss": 0.6439, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.14777254617892e-05, |
|
"loss": 0.6424, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.104310032596884e-05, |
|
"loss": 0.6404, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.060847519014849e-05, |
|
"loss": 0.6387, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.017385005432814e-05, |
|
"loss": 0.6462, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.973922491850778e-05, |
|
"loss": 0.6431, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.9304599782687424e-05, |
|
"loss": 0.638, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.8869974646867074e-05, |
|
"loss": 0.6344, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_loss": 0.6704220771789551, |
|
"eval_runtime": 50.1558, |
|
"eval_samples_per_second": 39.876, |
|
"eval_steps_per_second": 0.638, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.843534951104672e-05, |
|
"loss": 0.6448, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.800072437522636e-05, |
|
"loss": 0.6449, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.756609923940601e-05, |
|
"loss": 0.6399, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.7131474103585654e-05, |
|
"loss": 0.638, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.66968489677653e-05, |
|
"loss": 0.6418, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.626222383194495e-05, |
|
"loss": 0.6482, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.582759869612459e-05, |
|
"loss": 0.6392, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.5392973560304233e-05, |
|
"loss": 0.6363, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.4958348424483883e-05, |
|
"loss": 0.6503, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.452372328866353e-05, |
|
"loss": 0.6453, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 0.670009195804596, |
|
"eval_runtime": 50.155, |
|
"eval_samples_per_second": 39.876, |
|
"eval_steps_per_second": 0.638, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.408909815284317e-05, |
|
"loss": 0.6384, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.365447301702282e-05, |
|
"loss": 0.6449, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.3219847881202456e-05, |
|
"loss": 0.6406, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.27852227453821e-05, |
|
"loss": 0.6363, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.235059760956174e-05, |
|
"loss": 0.6482, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.191597247374139e-05, |
|
"loss": 0.6503, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.1481347337921036e-05, |
|
"loss": 0.6479, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.10684534588917e-05, |
|
"loss": 0.6437, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.063382832307134e-05, |
|
"loss": 0.6398, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.0199203187250985e-05, |
|
"loss": 0.6456, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 0.6702134013175964, |
|
"eval_runtime": 50.1834, |
|
"eval_samples_per_second": 39.854, |
|
"eval_steps_per_second": 0.638, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.9764578051430635e-05, |
|
"loss": 0.646, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.932995291561028e-05, |
|
"loss": 0.6375, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.889532777978992e-05, |
|
"loss": 0.6393, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.846070264396957e-05, |
|
"loss": 0.638, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.8026077508149215e-05, |
|
"loss": 0.6411, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.759145237232886e-05, |
|
"loss": 0.6467, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.715682723650851e-05, |
|
"loss": 0.6369, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.672220210068815e-05, |
|
"loss": 0.637, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.6287576964867795e-05, |
|
"loss": 0.6486, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.5852951829047445e-05, |
|
"loss": 0.637, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 0.6698750257492065, |
|
"eval_runtime": 50.1539, |
|
"eval_samples_per_second": 39.877, |
|
"eval_steps_per_second": 0.638, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.541832669322709e-05, |
|
"loss": 0.639, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.498370155740673e-05, |
|
"loss": 0.6366, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.454907642158638e-05, |
|
"loss": 0.6409, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.4114451285766025e-05, |
|
"loss": 0.6394, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.367982614994567e-05, |
|
"loss": 0.6351, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.324520101412532e-05, |
|
"loss": 0.6391, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.281057587830496e-05, |
|
"loss": 0.6267, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.2375950742484604e-05, |
|
"loss": 0.6461, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.194132560666425e-05, |
|
"loss": 0.6483, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.150670047084389e-05, |
|
"loss": 0.6461, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 0.6692882180213928, |
|
"eval_runtime": 50.1673, |
|
"eval_samples_per_second": 39.867, |
|
"eval_steps_per_second": 0.638, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.1072075335023534e-05, |
|
"loss": 0.6429, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.0637450199203184e-05, |
|
"loss": 0.6416, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.020282506338283e-05, |
|
"loss": 0.6356, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.976819992756247e-05, |
|
"loss": 0.6402, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.933357479174212e-05, |
|
"loss": 0.6395, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.8898949655921764e-05, |
|
"loss": 0.6432, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.846432452010141e-05, |
|
"loss": 0.6386, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.802969938428106e-05, |
|
"loss": 0.6396, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.75950742484607e-05, |
|
"loss": 0.6423, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.7160449112640344e-05, |
|
"loss": 0.649, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 0.6691960096359253, |
|
"eval_runtime": 50.1649, |
|
"eval_samples_per_second": 39.869, |
|
"eval_steps_per_second": 0.638, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.672582397681999e-05, |
|
"loss": 0.6547, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.629119884099964e-05, |
|
"loss": 0.642, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.585657370517928e-05, |
|
"loss": 0.634, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.542194856935892e-05, |
|
"loss": 0.6447, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.498732343353857e-05, |
|
"loss": 0.6285, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.455269829771822e-05, |
|
"loss": 0.6436, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.411807316189786e-05, |
|
"loss": 0.6349, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.36834480260775e-05, |
|
"loss": 0.6425, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.324882289025715e-05, |
|
"loss": 0.6393, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.2814197754436796e-05, |
|
"loss": 0.6367, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_loss": 0.6687243580818176, |
|
"eval_runtime": 50.3508, |
|
"eval_samples_per_second": 39.721, |
|
"eval_steps_per_second": 0.636, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.237957261861644e-05, |
|
"loss": 0.6386, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.194494748279609e-05, |
|
"loss": 0.6526, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.151032234697573e-05, |
|
"loss": 0.6357, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.1075697211155376e-05, |
|
"loss": 0.6353, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.0641072075335026e-05, |
|
"loss": 0.6449, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.0206446939514663e-05, |
|
"loss": 0.6425, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.977182180369431e-05, |
|
"loss": 0.6374, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.9337196667873956e-05, |
|
"loss": 0.6324, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.89025715320536e-05, |
|
"loss": 0.6502, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.8467946396233246e-05, |
|
"loss": 0.637, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_loss": 0.6683821082115173, |
|
"eval_runtime": 50.2054, |
|
"eval_samples_per_second": 39.836, |
|
"eval_steps_per_second": 0.637, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.8033321260412892e-05, |
|
"loss": 0.647, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.7598696124592536e-05, |
|
"loss": 0.632, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.7164070988772182e-05, |
|
"loss": 0.6411, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.672944585295183e-05, |
|
"loss": 0.632, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.6294820717131472e-05, |
|
"loss": 0.6389, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.586019558131112e-05, |
|
"loss": 0.6337, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.542557044549076e-05, |
|
"loss": 0.6439, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.4990945309670405e-05, |
|
"loss": 0.6364, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.4556320173850052e-05, |
|
"loss": 0.6402, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.4121695038029695e-05, |
|
"loss": 0.6376, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 0.6680713295936584, |
|
"eval_runtime": 50.1757, |
|
"eval_samples_per_second": 39.86, |
|
"eval_steps_per_second": 0.638, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.3687069902209342e-05, |
|
"loss": 0.6316, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.325244476638899e-05, |
|
"loss": 0.6393, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.281781963056863e-05, |
|
"loss": 0.6372, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.2383194494748278e-05, |
|
"loss": 0.6466, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.1948569358927925e-05, |
|
"loss": 0.6392, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.1513944223107568e-05, |
|
"loss": 0.6389, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.107931908728721e-05, |
|
"loss": 0.64, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.0644693951466858e-05, |
|
"loss": 0.6362, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.02100688156465e-05, |
|
"loss": 0.6364, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.9775443679826148e-05, |
|
"loss": 0.6372, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 0.6680414080619812, |
|
"eval_runtime": 50.2211, |
|
"eval_samples_per_second": 39.824, |
|
"eval_steps_per_second": 0.637, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.9340818544005794e-05, |
|
"loss": 0.6336, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.8906193408185438e-05, |
|
"loss": 0.6348, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.8471568272365084e-05, |
|
"loss": 0.6338, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.8036943136544728e-05, |
|
"loss": 0.6396, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.7602318000724374e-05, |
|
"loss": 0.641, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.7167692864904017e-05, |
|
"loss": 0.6369, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.6733067729083664e-05, |
|
"loss": 0.6345, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.629844259326331e-05, |
|
"loss": 0.649, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.5863817457442954e-05, |
|
"loss": 0.6409, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.54291923216226e-05, |
|
"loss": 0.63, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_loss": 0.6678950190544128, |
|
"eval_runtime": 50.1908, |
|
"eval_samples_per_second": 39.848, |
|
"eval_steps_per_second": 0.638, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.4994567185802244e-05, |
|
"loss": 0.6428, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.4559942049981889e-05, |
|
"loss": 0.645, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.4125316914161534e-05, |
|
"loss": 0.6434, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.369069177834118e-05, |
|
"loss": 0.6462, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.3256066642520825e-05, |
|
"loss": 0.6387, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.2821441506700468e-05, |
|
"loss": 0.6311, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.2386816370880113e-05, |
|
"loss": 0.6446, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.195219123505976e-05, |
|
"loss": 0.6426, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.1517566099239405e-05, |
|
"loss": 0.6369, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.108294096341905e-05, |
|
"loss": 0.6467, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_loss": 0.6676326990127563, |
|
"eval_runtime": 50.1589, |
|
"eval_samples_per_second": 39.873, |
|
"eval_steps_per_second": 0.638, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.0648315827598697e-05, |
|
"loss": 0.6347, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.021369069177834e-05, |
|
"loss": 0.6364, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.779065555957985e-06, |
|
"loss": 0.6309, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 9.34444042013763e-06, |
|
"loss": 0.6407, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 8.909815284317276e-06, |
|
"loss": 0.6389, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.475190148496921e-06, |
|
"loss": 0.6378, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.040565012676566e-06, |
|
"loss": 0.6359, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 7.60593987685621e-06, |
|
"loss": 0.6282, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 7.171314741035856e-06, |
|
"loss": 0.6409, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.736689605215501e-06, |
|
"loss": 0.6339, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_loss": 0.6675477027893066, |
|
"eval_runtime": 50.3638, |
|
"eval_samples_per_second": 39.711, |
|
"eval_steps_per_second": 0.635, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 6.302064469395146e-06, |
|
"loss": 0.6306, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.867439333574791e-06, |
|
"loss": 0.6438, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.432814197754437e-06, |
|
"loss": 0.6372, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.9981890619340815e-06, |
|
"loss": 0.6373, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.5635639261137265e-06, |
|
"loss": 0.6441, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.128938790293371e-06, |
|
"loss": 0.6486, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.6943136544730164e-06, |
|
"loss": 0.6359, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 3.259688518652662e-06, |
|
"loss": 0.6401, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.825063382832307e-06, |
|
"loss": 0.6493, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.390438247011952e-06, |
|
"loss": 0.6368, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_loss": 0.6671983599662781, |
|
"eval_runtime": 50.179, |
|
"eval_samples_per_second": 39.857, |
|
"eval_steps_per_second": 0.638, |
|
"step": 13800 |
|
} |
|
], |
|
"max_steps": 13905, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.7542324274428117e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|