|
{ |
|
"best_metric": 2.3293075561523438, |
|
"best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved_bloomfirefly/checkpoint-19000", |
|
"epoch": 2.952531261381571, |
|
"global_step": 19000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 2.9733, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 2.7809, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 2.6052, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 2.4925, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0003, |
|
"loss": 2.458, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029968758135902107, |
|
"loss": 2.4281, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029937516271804216, |
|
"loss": 2.4178, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029906274407706326, |
|
"loss": 2.3839, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002987503254360843, |
|
"loss": 2.3521, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029843790679510545, |
|
"loss": 2.338, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.510117292404175, |
|
"eval_runtime": 69.1765, |
|
"eval_samples_per_second": 28.912, |
|
"eval_steps_per_second": 1.807, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002981254881541265, |
|
"loss": 2.3401, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002978130695131476, |
|
"loss": 2.3665, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002975006508721687, |
|
"loss": 2.3691, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002971882322311898, |
|
"loss": 2.3514, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002968758135902109, |
|
"loss": 2.3203, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029656339494923197, |
|
"loss": 2.3393, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000296250976308253, |
|
"loss": 2.3289, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029593855766727416, |
|
"loss": 2.3407, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002956261390262952, |
|
"loss": 2.3163, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002953137203853163, |
|
"loss": 2.3212, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 2.473245620727539, |
|
"eval_runtime": 69.0219, |
|
"eval_samples_per_second": 28.976, |
|
"eval_steps_per_second": 1.811, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002950013017443374, |
|
"loss": 2.2927, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002946888831033585, |
|
"loss": 2.2927, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002943764644623796, |
|
"loss": 2.29, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002940640458214007, |
|
"loss": 2.3099, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002937516271804217, |
|
"loss": 2.3286, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002934392085394428, |
|
"loss": 2.2928, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002931267898984639, |
|
"loss": 2.2956, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000292814371257485, |
|
"loss": 2.2627, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002925019526165061, |
|
"loss": 2.2897, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002921895339755272, |
|
"loss": 2.2994, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.455402374267578, |
|
"eval_runtime": 69.1315, |
|
"eval_samples_per_second": 28.93, |
|
"eval_steps_per_second": 1.808, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00029187711533454824, |
|
"loss": 2.3232, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002915646966935694, |
|
"loss": 2.2515, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00029125227805259043, |
|
"loss": 2.2856, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002909398594116115, |
|
"loss": 2.252, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002906274407706326, |
|
"loss": 2.2891, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002903150221296537, |
|
"loss": 2.2769, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002900026034886748, |
|
"loss": 2.2763, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002896901848476959, |
|
"loss": 2.278, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00028937776620671695, |
|
"loss": 2.3126, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002890653475657381, |
|
"loss": 2.2698, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 2.4434444904327393, |
|
"eval_runtime": 69.7211, |
|
"eval_samples_per_second": 28.686, |
|
"eval_steps_per_second": 1.793, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00028875292892475914, |
|
"loss": 2.2587, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00028844051028378023, |
|
"loss": 2.2954, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00028812809164280133, |
|
"loss": 2.3102, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002878156730018224, |
|
"loss": 2.2918, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002875032543608435, |
|
"loss": 2.2698, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002871908357198646, |
|
"loss": 2.2514, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028687841707888566, |
|
"loss": 2.2684, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028656599843790675, |
|
"loss": 2.2833, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028625357979692785, |
|
"loss": 2.2709, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028594116115594894, |
|
"loss": 2.2596, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.436037302017212, |
|
"eval_runtime": 69.727, |
|
"eval_samples_per_second": 28.683, |
|
"eval_steps_per_second": 1.793, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028562874251497004, |
|
"loss": 2.2743, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028531632387399113, |
|
"loss": 2.23, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002850039052330122, |
|
"loss": 2.2723, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002846914865920333, |
|
"loss": 2.2585, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028437906795105437, |
|
"loss": 2.2463, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028406664931007546, |
|
"loss": 2.2264, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028375423066909656, |
|
"loss": 2.223, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028344181202811765, |
|
"loss": 2.2412, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028312939338713875, |
|
"loss": 2.2714, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00028281697474615984, |
|
"loss": 2.2638, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 2.4272871017456055, |
|
"eval_runtime": 69.3748, |
|
"eval_samples_per_second": 28.829, |
|
"eval_steps_per_second": 1.802, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002825045561051809, |
|
"loss": 2.2303, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000282192137464202, |
|
"loss": 2.2491, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00028187971882322313, |
|
"loss": 2.2598, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00028156730018224417, |
|
"loss": 2.2566, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00028125488154126527, |
|
"loss": 2.2642, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00028094246290028636, |
|
"loss": 2.2976, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00028063004425930746, |
|
"loss": 2.2144, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00028031762561832855, |
|
"loss": 2.2618, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00028000520697734965, |
|
"loss": 2.2232, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002796927883363707, |
|
"loss": 2.2349, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.422177314758301, |
|
"eval_runtime": 69.7796, |
|
"eval_samples_per_second": 28.662, |
|
"eval_steps_per_second": 1.791, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00027938036969539184, |
|
"loss": 2.2655, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002790679510544129, |
|
"loss": 2.265, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.000278755532413434, |
|
"loss": 2.2552, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00027844311377245507, |
|
"loss": 2.252, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00027813069513147617, |
|
"loss": 2.255, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00027781827649049726, |
|
"loss": 2.1869, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00027750585784951836, |
|
"loss": 2.2601, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002771934392085394, |
|
"loss": 2.2607, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002768810205675605, |
|
"loss": 2.2245, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002765686019265816, |
|
"loss": 2.2561, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 2.4173202514648438, |
|
"eval_runtime": 69.7813, |
|
"eval_samples_per_second": 28.661, |
|
"eval_steps_per_second": 1.791, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002762561832856027, |
|
"loss": 2.2472, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002759437646446238, |
|
"loss": 2.2952, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002756313460036449, |
|
"loss": 2.1941, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002753189273626659, |
|
"loss": 2.2396, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00027500650872168707, |
|
"loss": 2.2325, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002746940900807081, |
|
"loss": 2.2458, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002743816714397292, |
|
"loss": 2.2464, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002740692527987503, |
|
"loss": 2.2487, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002737568341577714, |
|
"loss": 2.2609, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002734444155167925, |
|
"loss": 2.3016, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 2.4146716594696045, |
|
"eval_runtime": 69.513, |
|
"eval_samples_per_second": 28.772, |
|
"eval_steps_per_second": 1.798, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002731319968758136, |
|
"loss": 2.2415, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002728195782348346, |
|
"loss": 2.2512, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002725071595938558, |
|
"loss": 2.2186, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002721947409528768, |
|
"loss": 2.1982, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002718823223118979, |
|
"loss": 2.2358, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.000271569903670919, |
|
"loss": 2.2359, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002712574850299401, |
|
"loss": 2.2367, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002709450663889612, |
|
"loss": 2.2209, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002706326477479823, |
|
"loss": 2.2026, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00027032022910700333, |
|
"loss": 2.2302, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 2.4096806049346924, |
|
"eval_runtime": 69.8744, |
|
"eval_samples_per_second": 28.623, |
|
"eval_steps_per_second": 1.789, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00027000781046602443, |
|
"loss": 2.2516, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002696953918250455, |
|
"loss": 2.2173, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002693829731840666, |
|
"loss": 2.2414, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002690705545430877, |
|
"loss": 2.1922, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002687581359021088, |
|
"loss": 2.2396, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00026844571726112985, |
|
"loss": 2.2602, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.000268133298620151, |
|
"loss": 2.2263, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00026782087997917204, |
|
"loss": 2.2082, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00026750846133819314, |
|
"loss": 2.2144, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00026719604269721423, |
|
"loss": 2.2066, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 2.4065375328063965, |
|
"eval_runtime": 69.933, |
|
"eval_samples_per_second": 28.599, |
|
"eval_steps_per_second": 1.787, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00026688362405623533, |
|
"loss": 2.2494, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002665712054152564, |
|
"loss": 2.2471, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002662587867742775, |
|
"loss": 2.2512, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00026594636813329856, |
|
"loss": 2.2249, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002656339494923197, |
|
"loss": 2.2526, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00026532153085134075, |
|
"loss": 2.2375, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00026500911221036185, |
|
"loss": 2.169, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00026469669356938294, |
|
"loss": 2.2206, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00026438427492840404, |
|
"loss": 2.2284, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00026407185628742513, |
|
"loss": 2.2116, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 2.402400255203247, |
|
"eval_runtime": 70.6508, |
|
"eval_samples_per_second": 28.308, |
|
"eval_steps_per_second": 1.769, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00026375943764644623, |
|
"loss": 2.2228, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0002634470190054673, |
|
"loss": 2.2264, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00026313460036448837, |
|
"loss": 2.2212, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002628221817235095, |
|
"loss": 2.2164, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00026250976308253056, |
|
"loss": 2.2523, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00026219734444155165, |
|
"loss": 2.2272, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00026188492580057275, |
|
"loss": 2.2381, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00026157250715959384, |
|
"loss": 2.2149, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00026126008851861494, |
|
"loss": 2.228, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00026094766987763603, |
|
"loss": 2.2145, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 2.399576425552368, |
|
"eval_runtime": 69.9194, |
|
"eval_samples_per_second": 28.604, |
|
"eval_steps_per_second": 1.788, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002606352512366571, |
|
"loss": 2.18, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002603228325956782, |
|
"loss": 2.1965, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00026001041395469927, |
|
"loss": 2.178, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00025969799531372036, |
|
"loss": 2.194, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00025938557667274146, |
|
"loss": 2.2024, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00025907315803176255, |
|
"loss": 2.2427, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00025876073939078365, |
|
"loss": 2.2246, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00025844832074980474, |
|
"loss": 2.2169, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002581359021088258, |
|
"loss": 2.2154, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002578234834678469, |
|
"loss": 2.1732, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 2.3982491493225098, |
|
"eval_runtime": 70.2191, |
|
"eval_samples_per_second": 28.482, |
|
"eval_steps_per_second": 1.78, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.000257511064826868, |
|
"loss": 2.1951, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00025719864618588907, |
|
"loss": 2.2139, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00025688622754491017, |
|
"loss": 2.197, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00025657380890393126, |
|
"loss": 2.2317, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0002562613902629523, |
|
"loss": 2.2107, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00025594897162197345, |
|
"loss": 2.2087, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0002556365529809945, |
|
"loss": 2.2124, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0002553241343400156, |
|
"loss": 2.1762, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0002550117156990367, |
|
"loss": 2.2488, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002546992970580578, |
|
"loss": 2.2316, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 2.394296646118164, |
|
"eval_runtime": 70.2494, |
|
"eval_samples_per_second": 28.47, |
|
"eval_steps_per_second": 1.779, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002543868784170789, |
|
"loss": 2.2386, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00025407445977609997, |
|
"loss": 2.224, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.000253762041135121, |
|
"loss": 2.2479, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0002534496224941421, |
|
"loss": 2.2396, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0002531372038531632, |
|
"loss": 2.2405, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0002528247852121843, |
|
"loss": 2.1969, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002525123665712054, |
|
"loss": 2.2095, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002521999479302265, |
|
"loss": 2.2202, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002518875292892476, |
|
"loss": 2.2088, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0002515751106482687, |
|
"loss": 2.2075, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 2.3918581008911133, |
|
"eval_runtime": 69.2896, |
|
"eval_samples_per_second": 28.864, |
|
"eval_steps_per_second": 1.804, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0002512626920072897, |
|
"loss": 2.1993, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0002509502733663108, |
|
"loss": 2.2406, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002506378547253319, |
|
"loss": 2.2352, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.000250325436084353, |
|
"loss": 2.236, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002500130174433741, |
|
"loss": 2.1805, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0002497005988023952, |
|
"loss": 2.2249, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00024938818016141624, |
|
"loss": 2.2153, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0002490757615204374, |
|
"loss": 2.2115, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00024876334287945843, |
|
"loss": 2.2284, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002484509242384795, |
|
"loss": 2.184, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 2.3887791633605957, |
|
"eval_runtime": 69.2387, |
|
"eval_samples_per_second": 28.886, |
|
"eval_steps_per_second": 1.805, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002481385055975006, |
|
"loss": 2.2172, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002478260869565217, |
|
"loss": 2.2347, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002475136683155428, |
|
"loss": 2.2213, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002472012496745639, |
|
"loss": 2.2215, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00024688883103358495, |
|
"loss": 2.2058, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00024657641239260604, |
|
"loss": 2.1918, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002462639937516272, |
|
"loss": 2.2021, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00024595157511064824, |
|
"loss": 2.1832, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00024563915646966933, |
|
"loss": 2.2199, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002453267378286904, |
|
"loss": 2.1997, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 2.386540412902832, |
|
"eval_runtime": 69.2123, |
|
"eval_samples_per_second": 28.897, |
|
"eval_steps_per_second": 1.806, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002450143191877115, |
|
"loss": 2.2009, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002447019005467326, |
|
"loss": 2.2045, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002443894819057537, |
|
"loss": 2.2231, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00024407706326477478, |
|
"loss": 2.211, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00024376464462379588, |
|
"loss": 2.1904, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00024345222598281694, |
|
"loss": 2.1492, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00024313980734183807, |
|
"loss": 2.2368, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00024282738870085914, |
|
"loss": 2.1753, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00024251497005988023, |
|
"loss": 2.179, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0002422025514189013, |
|
"loss": 2.1811, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 2.3864212036132812, |
|
"eval_runtime": 69.2951, |
|
"eval_samples_per_second": 28.862, |
|
"eval_steps_per_second": 1.804, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0002418901327779224, |
|
"loss": 2.1496, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0002415777141369435, |
|
"loss": 2.2071, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00024126529549596459, |
|
"loss": 2.189, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00024095287685498565, |
|
"loss": 2.1838, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00024064045821400675, |
|
"loss": 2.2292, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00024032803957302782, |
|
"loss": 2.1931, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00024001562093204894, |
|
"loss": 2.2293, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00023970320229107, |
|
"loss": 2.2112, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0002393907836500911, |
|
"loss": 2.1479, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00023907836500911217, |
|
"loss": 2.1661, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 2.383505344390869, |
|
"eval_runtime": 69.2876, |
|
"eval_samples_per_second": 28.865, |
|
"eval_steps_per_second": 1.804, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0002387659463681333, |
|
"loss": 2.1783, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00023845352772715436, |
|
"loss": 2.1975, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00023814110908617546, |
|
"loss": 2.2268, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00023782869044519653, |
|
"loss": 2.1815, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00023751627180421765, |
|
"loss": 2.2305, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00023720385316323872, |
|
"loss": 2.2087, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0002368914345222598, |
|
"loss": 2.2204, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00023657901588128088, |
|
"loss": 2.2138, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.000236266597240302, |
|
"loss": 2.2071, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00023595417859932307, |
|
"loss": 2.1728, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 2.3820013999938965, |
|
"eval_runtime": 69.3049, |
|
"eval_samples_per_second": 28.858, |
|
"eval_steps_per_second": 1.804, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023564175995834417, |
|
"loss": 2.182, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023532934131736524, |
|
"loss": 2.1948, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023501692267638633, |
|
"loss": 2.2178, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00023470450403540743, |
|
"loss": 2.1979, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00023439208539442852, |
|
"loss": 2.222, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002340796667534496, |
|
"loss": 2.221, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00023376724811247069, |
|
"loss": 2.208, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00023345482947149175, |
|
"loss": 2.1502, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00023314241083051288, |
|
"loss": 2.1628, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00023282999218953395, |
|
"loss": 2.1933, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 2.380128860473633, |
|
"eval_runtime": 69.2864, |
|
"eval_samples_per_second": 28.866, |
|
"eval_steps_per_second": 1.804, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023251757354855504, |
|
"loss": 2.2204, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0002322051549075761, |
|
"loss": 2.218, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023189273626659723, |
|
"loss": 2.199, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002315803176256183, |
|
"loss": 2.1826, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002312678989846394, |
|
"loss": 2.174, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00023095548034366046, |
|
"loss": 2.2011, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00023064306170268159, |
|
"loss": 2.1951, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00023033064306170265, |
|
"loss": 2.2189, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00023001822442072375, |
|
"loss": 2.1891, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00022970580577974482, |
|
"loss": 2.1873, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 2.379713296890259, |
|
"eval_runtime": 69.3005, |
|
"eval_samples_per_second": 28.86, |
|
"eval_steps_per_second": 1.804, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022939338713876591, |
|
"loss": 2.2191, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.000229080968497787, |
|
"loss": 2.1966, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0002287685498568081, |
|
"loss": 2.2062, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00022845613121582917, |
|
"loss": 2.1888, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00022814371257485027, |
|
"loss": 2.1938, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002278312939338714, |
|
"loss": 2.206, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00022751887529289246, |
|
"loss": 2.1584, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00022720645665191355, |
|
"loss": 2.1933, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00022689403801093462, |
|
"loss": 2.2087, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00022658161936995575, |
|
"loss": 2.2239, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 2.3774757385253906, |
|
"eval_runtime": 69.3137, |
|
"eval_samples_per_second": 28.854, |
|
"eval_steps_per_second": 1.803, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00022626920072897681, |
|
"loss": 2.2136, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0002259567820879979, |
|
"loss": 2.2046, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022564436344701898, |
|
"loss": 2.2031, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0002253319448060401, |
|
"loss": 2.171, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022501952616506117, |
|
"loss": 2.2101, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022470710752408226, |
|
"loss": 2.1306, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00022439468888310333, |
|
"loss": 2.1754, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00022408227024212443, |
|
"loss": 2.1972, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00022376985160114552, |
|
"loss": 2.2175, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00022345743296016662, |
|
"loss": 2.139, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 2.3760337829589844, |
|
"eval_runtime": 69.3092, |
|
"eval_samples_per_second": 28.856, |
|
"eval_steps_per_second": 1.804, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0002231450143191877, |
|
"loss": 2.1912, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00022283259567820878, |
|
"loss": 2.2036, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022252017703722985, |
|
"loss": 2.1852, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022220775839625097, |
|
"loss": 2.1672, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022189533975527204, |
|
"loss": 2.1828, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00022158292111429314, |
|
"loss": 2.1875, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002212705024733142, |
|
"loss": 2.1997, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00022095808383233533, |
|
"loss": 2.2162, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002206456651913564, |
|
"loss": 2.2213, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0002203332465503775, |
|
"loss": 2.1972, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 2.374734878540039, |
|
"eval_runtime": 69.2582, |
|
"eval_samples_per_second": 28.877, |
|
"eval_steps_per_second": 1.805, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00022002082790939856, |
|
"loss": 2.175, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00021970840926841968, |
|
"loss": 2.1951, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00021939599062744075, |
|
"loss": 2.1493, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00021908357198646185, |
|
"loss": 2.1611, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00021877115334548291, |
|
"loss": 2.1621, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00021845873470450404, |
|
"loss": 2.1875, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0002181463160635251, |
|
"loss": 2.1733, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0002178338974225462, |
|
"loss": 2.242, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00021752147878156727, |
|
"loss": 2.2154, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00021720906014058836, |
|
"loss": 2.1969, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 2.372680902481079, |
|
"eval_runtime": 69.283, |
|
"eval_samples_per_second": 28.867, |
|
"eval_steps_per_second": 1.804, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00021689664149960946, |
|
"loss": 2.1245, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00021658422285863056, |
|
"loss": 2.2049, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00021627180421765162, |
|
"loss": 2.1716, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00021595938557667272, |
|
"loss": 2.1891, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0002156469669356938, |
|
"loss": 2.1963, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0002153345482947149, |
|
"loss": 2.1946, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00021502212965373598, |
|
"loss": 2.1982, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00021470971101275707, |
|
"loss": 2.1759, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00021439729237177814, |
|
"loss": 2.1661, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00021408487373079926, |
|
"loss": 2.2051, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 2.3719565868377686, |
|
"eval_runtime": 69.321, |
|
"eval_samples_per_second": 28.851, |
|
"eval_steps_per_second": 1.803, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00021377245508982033, |
|
"loss": 2.1605, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00021346003644884143, |
|
"loss": 2.1375, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0002131476178078625, |
|
"loss": 2.1293, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00021283519916688362, |
|
"loss": 2.2189, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0002125227805259047, |
|
"loss": 2.1784, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00021221036188492578, |
|
"loss": 2.1764, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00021189794324394685, |
|
"loss": 2.1569, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00021158552460296795, |
|
"loss": 2.1704, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00021127310596198904, |
|
"loss": 2.1614, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00021096068732101014, |
|
"loss": 2.2078, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 2.370939016342163, |
|
"eval_runtime": 69.2728, |
|
"eval_samples_per_second": 28.871, |
|
"eval_steps_per_second": 1.804, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0002106482686800312, |
|
"loss": 2.198, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0002103358500390523, |
|
"loss": 2.1735, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00021002343139807342, |
|
"loss": 2.1936, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0002097110127570945, |
|
"loss": 2.1559, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0002093985941161156, |
|
"loss": 2.1856, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00020908617547513666, |
|
"loss": 2.194, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00020877375683415778, |
|
"loss": 2.1983, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00020846133819317885, |
|
"loss": 2.1788, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00020814891955219994, |
|
"loss": 2.2126, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.000207836500911221, |
|
"loss": 2.1454, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 2.369137763977051, |
|
"eval_runtime": 69.3036, |
|
"eval_samples_per_second": 28.859, |
|
"eval_steps_per_second": 1.804, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020752408227024213, |
|
"loss": 2.1603, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0002072116636292632, |
|
"loss": 2.2075, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0002068992449882843, |
|
"loss": 2.1817, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020658682634730537, |
|
"loss": 2.1917, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00020627440770632646, |
|
"loss": 2.1727, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00020596198906534756, |
|
"loss": 2.1985, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00020564957042436865, |
|
"loss": 2.1888, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00020533715178338972, |
|
"loss": 2.1425, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00020502473314241082, |
|
"loss": 2.1659, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00020471231450143188, |
|
"loss": 2.1768, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 2.368589162826538, |
|
"eval_runtime": 69.4033, |
|
"eval_samples_per_second": 28.817, |
|
"eval_steps_per_second": 1.801, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.000204399895860453, |
|
"loss": 2.1744, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00020408747721947407, |
|
"loss": 2.1484, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00020377505857849517, |
|
"loss": 2.2154, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020346263993751624, |
|
"loss": 2.1358, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020315022129653736, |
|
"loss": 2.1809, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020283780265555843, |
|
"loss": 2.1813, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00020252538401457952, |
|
"loss": 2.1903, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0002022129653736006, |
|
"loss": 2.1971, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00020190054673262172, |
|
"loss": 2.2041, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00020158812809164278, |
|
"loss": 2.2169, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 2.3672330379486084, |
|
"eval_runtime": 69.3516, |
|
"eval_samples_per_second": 28.839, |
|
"eval_steps_per_second": 1.802, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00020127570945066388, |
|
"loss": 2.2101, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00020096329080968495, |
|
"loss": 2.1739, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00020065087216870604, |
|
"loss": 2.1764, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00020033845352772714, |
|
"loss": 2.1718, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00020002603488674823, |
|
"loss": 2.1688, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0001997136162457693, |
|
"loss": 2.1322, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0001994011976047904, |
|
"loss": 2.1593, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0001990887789638115, |
|
"loss": 2.179, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0001987763603228326, |
|
"loss": 2.139, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019846394168185366, |
|
"loss": 2.1594, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 2.367051839828491, |
|
"eval_runtime": 69.3473, |
|
"eval_samples_per_second": 28.84, |
|
"eval_steps_per_second": 1.803, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019815152304087475, |
|
"loss": 2.2033, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019783910439989582, |
|
"loss": 2.183, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019752668575891694, |
|
"loss": 2.1517, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.000197214267117938, |
|
"loss": 2.183, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0001969018484769591, |
|
"loss": 2.197, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019658942983598017, |
|
"loss": 2.1778, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0001962770111950013, |
|
"loss": 2.1745, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019596459255402237, |
|
"loss": 2.1585, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019565217391304346, |
|
"loss": 2.1708, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00019533975527206453, |
|
"loss": 2.1649, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_loss": 2.363710880279541, |
|
"eval_runtime": 69.2642, |
|
"eval_samples_per_second": 28.875, |
|
"eval_steps_per_second": 1.805, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00019502733663108565, |
|
"loss": 2.1391, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00019471491799010672, |
|
"loss": 2.1939, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019440249934912782, |
|
"loss": 2.1558, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019409008070814888, |
|
"loss": 2.173, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019377766206716998, |
|
"loss": 2.1821, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019346524342619107, |
|
"loss": 2.16, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019315282478521217, |
|
"loss": 2.1808, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019284040614423324, |
|
"loss": 2.1355, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019252798750325433, |
|
"loss": 2.1813, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019221556886227546, |
|
"loss": 2.1677, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 2.3648109436035156, |
|
"eval_runtime": 69.3675, |
|
"eval_samples_per_second": 28.832, |
|
"eval_steps_per_second": 1.802, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019190315022129652, |
|
"loss": 2.1479, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019159073158031762, |
|
"loss": 2.1852, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0001912783129393387, |
|
"loss": 2.14, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0001909658942983598, |
|
"loss": 2.1332, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00019065347565738088, |
|
"loss": 2.178, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00019034105701640197, |
|
"loss": 2.1661, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00019002863837542304, |
|
"loss": 2.1902, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00018971621973444417, |
|
"loss": 2.1775, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00018940380109346523, |
|
"loss": 2.2007, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00018909138245248633, |
|
"loss": 2.2078, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 2.3642289638519287, |
|
"eval_runtime": 69.5476, |
|
"eval_samples_per_second": 28.757, |
|
"eval_steps_per_second": 1.797, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0001887789638115074, |
|
"loss": 2.185, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0001884665451705285, |
|
"loss": 2.1856, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0001881541265295496, |
|
"loss": 2.2049, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00018784170788857068, |
|
"loss": 2.1376, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00018752928924759175, |
|
"loss": 2.1693, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018721687060661285, |
|
"loss": 2.1825, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018690445196563392, |
|
"loss": 2.1649, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018659203332465504, |
|
"loss": 2.1936, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0001862796146836761, |
|
"loss": 2.143, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0001859671960426972, |
|
"loss": 2.1617, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 2.362150192260742, |
|
"eval_runtime": 69.3218, |
|
"eval_samples_per_second": 28.851, |
|
"eval_steps_per_second": 1.803, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00018565477740171827, |
|
"loss": 2.1555, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0001853423587607394, |
|
"loss": 2.1639, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00018502994011976046, |
|
"loss": 2.1678, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00018471752147878156, |
|
"loss": 2.1775, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018440510283780263, |
|
"loss": 2.1784, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018409268419682375, |
|
"loss": 2.1499, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018378026555584482, |
|
"loss": 2.154, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0001834678469148659, |
|
"loss": 2.1793, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00018315542827388698, |
|
"loss": 2.2292, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00018284300963290808, |
|
"loss": 2.1578, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 2.3628857135772705, |
|
"eval_runtime": 69.2564, |
|
"eval_samples_per_second": 28.878, |
|
"eval_steps_per_second": 1.805, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00018253059099192917, |
|
"loss": 2.1494, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00018221817235095027, |
|
"loss": 2.1669, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00018190575370997133, |
|
"loss": 2.1447, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00018159333506899243, |
|
"loss": 2.1663, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0001812809164280135, |
|
"loss": 2.1871, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00018096849778703462, |
|
"loss": 2.1338, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0001806560791460557, |
|
"loss": 2.1767, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00018034366050507678, |
|
"loss": 2.1694, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00018003124186409785, |
|
"loss": 2.1674, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00017971882322311898, |
|
"loss": 2.1863, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 2.3613035678863525, |
|
"eval_runtime": 69.2881, |
|
"eval_samples_per_second": 28.865, |
|
"eval_steps_per_second": 1.804, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00017940640458214004, |
|
"loss": 2.1441, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00017909398594116114, |
|
"loss": 2.1885, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0001787815673001822, |
|
"loss": 2.1514, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00017846914865920333, |
|
"loss": 2.2002, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0001781567300182244, |
|
"loss": 2.1759, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0001778443113772455, |
|
"loss": 2.1611, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00017753189273626656, |
|
"loss": 2.1667, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00017721947409528768, |
|
"loss": 2.1717, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00017690705545430875, |
|
"loss": 2.1983, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00017659463681332985, |
|
"loss": 2.2092, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 2.3608274459838867, |
|
"eval_runtime": 69.3364, |
|
"eval_samples_per_second": 28.845, |
|
"eval_steps_per_second": 1.803, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00017628221817235092, |
|
"loss": 2.1305, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.000175969799531372, |
|
"loss": 2.1431, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0001756573808903931, |
|
"loss": 2.1384, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0001753449622494142, |
|
"loss": 2.2093, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00017503254360843527, |
|
"loss": 2.1271, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00017472012496745637, |
|
"loss": 2.1466, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0001744077063264775, |
|
"loss": 2.1578, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00017409528768549856, |
|
"loss": 2.1632, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00017378286904451965, |
|
"loss": 2.1465, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00017347045040354072, |
|
"loss": 2.2226, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 2.35835599899292, |
|
"eval_runtime": 69.2657, |
|
"eval_samples_per_second": 28.874, |
|
"eval_steps_per_second": 1.805, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00017315803176256184, |
|
"loss": 2.1585, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0001728456131215829, |
|
"loss": 2.1529, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.000172533194480604, |
|
"loss": 2.1663, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00017222077583962508, |
|
"loss": 2.1422, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00017190835719864617, |
|
"loss": 2.158, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00017159593855766727, |
|
"loss": 2.1984, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00017128351991668836, |
|
"loss": 2.1395, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00017097110127570943, |
|
"loss": 2.14, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00017065868263473053, |
|
"loss": 2.1657, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00017036188492580056, |
|
"loss": 2.167, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 2.35697603225708, |
|
"eval_runtime": 69.2685, |
|
"eval_samples_per_second": 28.873, |
|
"eval_steps_per_second": 1.805, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00017004946628482165, |
|
"loss": 2.1396, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00016973704764384272, |
|
"loss": 2.1777, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00016942462900286384, |
|
"loss": 2.1366, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0001691122103618849, |
|
"loss": 2.1625, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.000168799791720906, |
|
"loss": 2.1859, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00016848737307992707, |
|
"loss": 2.1705, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0001681749544389482, |
|
"loss": 2.1971, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00016786253579796927, |
|
"loss": 2.1937, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00016755011715699036, |
|
"loss": 2.1436, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016723769851601143, |
|
"loss": 2.1592, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_loss": 2.3576247692108154, |
|
"eval_runtime": 69.277, |
|
"eval_samples_per_second": 28.87, |
|
"eval_steps_per_second": 1.804, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016692527987503252, |
|
"loss": 2.1745, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016661286123405362, |
|
"loss": 2.1517, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00016630044259307472, |
|
"loss": 2.1921, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00016598802395209578, |
|
"loss": 2.1703, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00016567560531111688, |
|
"loss": 2.1223, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00016536318667013795, |
|
"loss": 2.1748, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00016505076802915907, |
|
"loss": 2.145, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00016473834938818014, |
|
"loss": 2.1077, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00016442593074720123, |
|
"loss": 2.1571, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0001641135121062223, |
|
"loss": 2.1946, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 2.3559648990631104, |
|
"eval_runtime": 69.3886, |
|
"eval_samples_per_second": 28.823, |
|
"eval_steps_per_second": 1.801, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00016380109346524342, |
|
"loss": 2.1635, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0001634886748242645, |
|
"loss": 2.1546, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0001631762561832856, |
|
"loss": 2.1359, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00016286383754230666, |
|
"loss": 2.1741, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00016255141890132778, |
|
"loss": 2.1382, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00016223900026034885, |
|
"loss": 2.1514, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00016192658161936994, |
|
"loss": 2.17, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.000161614162978391, |
|
"loss": 2.1784, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0001613017443374121, |
|
"loss": 2.1869, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0001609893256964332, |
|
"loss": 2.155, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_loss": 2.3562612533569336, |
|
"eval_runtime": 70.7208, |
|
"eval_samples_per_second": 28.28, |
|
"eval_steps_per_second": 1.768, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0001606769070554543, |
|
"loss": 2.1467, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00016036448841447537, |
|
"loss": 2.1662, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00016005206977349646, |
|
"loss": 2.1928, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00015973965113251756, |
|
"loss": 2.1084, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00015942723249153865, |
|
"loss": 2.182, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00015911481385055975, |
|
"loss": 2.1502, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00015880239520958082, |
|
"loss": 2.1645, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00015848997656860194, |
|
"loss": 2.1246, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.000158177557927623, |
|
"loss": 2.1769, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0001578651392866441, |
|
"loss": 2.1772, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 2.354128360748291, |
|
"eval_runtime": 70.4883, |
|
"eval_samples_per_second": 28.374, |
|
"eval_steps_per_second": 1.773, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00015755272064566517, |
|
"loss": 2.1777, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0001572403020046863, |
|
"loss": 2.1749, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00015692788336370736, |
|
"loss": 2.1861, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00015661546472272846, |
|
"loss": 2.1567, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00015630304608174952, |
|
"loss": 2.1426, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00015599062744077062, |
|
"loss": 2.1658, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00015567820879979172, |
|
"loss": 2.1639, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001553657901588128, |
|
"loss": 2.1897, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00015505337151783388, |
|
"loss": 2.1439, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00015474095287685497, |
|
"loss": 2.1326, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 2.352673292160034, |
|
"eval_runtime": 69.2871, |
|
"eval_samples_per_second": 28.865, |
|
"eval_steps_per_second": 1.804, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00015442853423587604, |
|
"loss": 2.139, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015411611559489717, |
|
"loss": 2.1087, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015380369695391823, |
|
"loss": 2.1528, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015349127831293933, |
|
"loss": 2.1866, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0001531788596719604, |
|
"loss": 2.1436, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015286644103098152, |
|
"loss": 2.1699, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0001525540223900026, |
|
"loss": 2.1415, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00015224160374902368, |
|
"loss": 2.1092, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00015192918510804475, |
|
"loss": 2.1422, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00015161676646706587, |
|
"loss": 2.1677, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_loss": 2.3518292903900146, |
|
"eval_runtime": 69.3029, |
|
"eval_samples_per_second": 28.859, |
|
"eval_steps_per_second": 1.804, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00015130434782608694, |
|
"loss": 2.1594, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00015099192918510804, |
|
"loss": 2.1539, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0001506795105441291, |
|
"loss": 2.1343, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00015036709190315023, |
|
"loss": 2.1386, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0001500546732621713, |
|
"loss": 2.1512, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0001497422546211924, |
|
"loss": 2.1669, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0001494298359802135, |
|
"loss": 2.158, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00014911741733923456, |
|
"loss": 2.1643, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00014880499869825565, |
|
"loss": 2.1612, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00014849258005727675, |
|
"loss": 2.1441, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 2.35211181640625, |
|
"eval_runtime": 69.2821, |
|
"eval_samples_per_second": 28.867, |
|
"eval_steps_per_second": 1.804, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00014818016141629784, |
|
"loss": 2.1704, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0001478677427753189, |
|
"loss": 2.1546, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00014755532413434, |
|
"loss": 2.1909, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0001472429054933611, |
|
"loss": 2.149, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00014693048685238217, |
|
"loss": 2.1419, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00014661806821140327, |
|
"loss": 2.1465, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00014630564957042436, |
|
"loss": 2.1551, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00014599323092944546, |
|
"loss": 2.1526, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00014568081228846653, |
|
"loss": 2.1437, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00014536839364748762, |
|
"loss": 2.1659, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 2.3507654666900635, |
|
"eval_runtime": 69.2997, |
|
"eval_samples_per_second": 28.86, |
|
"eval_steps_per_second": 1.804, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014505597500650872, |
|
"loss": 2.14, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0001447435563655298, |
|
"loss": 2.1289, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014443113772455088, |
|
"loss": 2.1226, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00014411871908357198, |
|
"loss": 2.1627, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00014380630044259307, |
|
"loss": 2.1759, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00014349388180161414, |
|
"loss": 2.1511, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00014318146316063523, |
|
"loss": 2.1275, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00014286904451965633, |
|
"loss": 2.1638, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00014255662587867743, |
|
"loss": 2.1494, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0001422442072376985, |
|
"loss": 2.1554, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 2.349271059036255, |
|
"eval_runtime": 69.2627, |
|
"eval_samples_per_second": 28.876, |
|
"eval_steps_per_second": 1.805, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0001419317885967196, |
|
"loss": 2.133, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00014161936995574068, |
|
"loss": 2.1515, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00014130695131476178, |
|
"loss": 2.1262, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00014099453267378285, |
|
"loss": 2.142, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00014068211403280394, |
|
"loss": 2.1578, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00014036969539182504, |
|
"loss": 2.1583, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0001400572767508461, |
|
"loss": 2.1043, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0001397448581098672, |
|
"loss": 2.1539, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0001394324394688883, |
|
"loss": 2.1189, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0001391200208279094, |
|
"loss": 2.1484, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 2.3479487895965576, |
|
"eval_runtime": 69.2625, |
|
"eval_samples_per_second": 28.876, |
|
"eval_steps_per_second": 1.805, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00013880760218693046, |
|
"loss": 2.1993, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00013849518354595156, |
|
"loss": 2.1869, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00013818276490497265, |
|
"loss": 2.1644, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00013787034626399375, |
|
"loss": 2.1751, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00013755792762301482, |
|
"loss": 2.1416, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.0001372455089820359, |
|
"loss": 2.1809, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.000136933090341057, |
|
"loss": 2.1653, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00013662067170007808, |
|
"loss": 2.1026, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00013630825305909917, |
|
"loss": 2.1503, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00013599583441812027, |
|
"loss": 2.1289, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_loss": 2.3468515872955322, |
|
"eval_runtime": 69.2274, |
|
"eval_samples_per_second": 28.89, |
|
"eval_steps_per_second": 1.806, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00013568341577714136, |
|
"loss": 2.1929, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00013537099713616243, |
|
"loss": 2.1547, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013505857849518353, |
|
"loss": 2.1571, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013474615985420462, |
|
"loss": 2.1649, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013443374121322572, |
|
"loss": 2.1647, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00013412132257224679, |
|
"loss": 2.206, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00013380890393126788, |
|
"loss": 2.1377, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00013349648529028898, |
|
"loss": 2.1347, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00013318406664931004, |
|
"loss": 2.1948, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00013287164800833114, |
|
"loss": 2.1844, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.347837209701538, |
|
"eval_runtime": 69.2425, |
|
"eval_samples_per_second": 28.884, |
|
"eval_steps_per_second": 1.805, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00013255922936735224, |
|
"loss": 2.1515, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00013224681072637333, |
|
"loss": 2.1885, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013193439208539443, |
|
"loss": 2.143, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013162197344441552, |
|
"loss": 2.1671, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0001313095548034366, |
|
"loss": 2.1426, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00013099713616245769, |
|
"loss": 2.1653, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00013068471752147878, |
|
"loss": 2.1774, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00013037229888049988, |
|
"loss": 2.1344, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00013005988023952094, |
|
"loss": 2.1217, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00012974746159854204, |
|
"loss": 2.1281, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 2.345808982849121, |
|
"eval_runtime": 69.2499, |
|
"eval_samples_per_second": 28.881, |
|
"eval_steps_per_second": 1.805, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00012943504295756314, |
|
"loss": 2.1459, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0001291226243165842, |
|
"loss": 2.1294, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0001288102056756053, |
|
"loss": 2.1455, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0001284977870346264, |
|
"loss": 2.1219, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0001281853683936475, |
|
"loss": 2.1696, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00012787294975266856, |
|
"loss": 2.1474, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00012756053111168965, |
|
"loss": 2.1436, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00012724811247071075, |
|
"loss": 2.1785, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00012693569382973184, |
|
"loss": 2.1677, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001266232751887529, |
|
"loss": 2.1564, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_loss": 2.3451294898986816, |
|
"eval_runtime": 69.2454, |
|
"eval_samples_per_second": 28.883, |
|
"eval_steps_per_second": 1.805, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.000126310856547774, |
|
"loss": 2.1793, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0001259984379067951, |
|
"loss": 2.1583, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00012568601926581617, |
|
"loss": 2.1482, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00012537360062483727, |
|
"loss": 2.1393, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012506118198385836, |
|
"loss": 2.1586, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012474876334287946, |
|
"loss": 2.1533, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012443634470190053, |
|
"loss": 2.1516, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00012412392606092162, |
|
"loss": 2.1184, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00012381150741994272, |
|
"loss": 2.1162, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0001234990887789638, |
|
"loss": 2.1588, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_loss": 2.3451669216156006, |
|
"eval_runtime": 69.2383, |
|
"eval_samples_per_second": 28.886, |
|
"eval_steps_per_second": 1.805, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00012318667013798488, |
|
"loss": 2.1588, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00012287425149700598, |
|
"loss": 2.1463, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00012256183285602707, |
|
"loss": 2.1498, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00012224941421504814, |
|
"loss": 2.1663, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012193699557406924, |
|
"loss": 2.1306, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012162457693309033, |
|
"loss": 2.1542, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012131215829211141, |
|
"loss": 2.1513, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012099973965113251, |
|
"loss": 2.2031, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012068732101015359, |
|
"loss": 2.1438, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012037490236917469, |
|
"loss": 2.1431, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 2.3447554111480713, |
|
"eval_runtime": 69.2865, |
|
"eval_samples_per_second": 28.866, |
|
"eval_steps_per_second": 1.804, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00012006248372819577, |
|
"loss": 2.1272, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00011975006508721686, |
|
"loss": 2.1584, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00011943764644623794, |
|
"loss": 2.128, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00011912522780525903, |
|
"loss": 2.1461, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00011881280916428012, |
|
"loss": 2.1411, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0001185003905233012, |
|
"loss": 2.1592, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0001181879718823223, |
|
"loss": 2.1642, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011787555324134338, |
|
"loss": 2.1914, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011756313460036448, |
|
"loss": 2.1612, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011725071595938556, |
|
"loss": 2.1452, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_loss": 2.3442630767822266, |
|
"eval_runtime": 69.2459, |
|
"eval_samples_per_second": 28.883, |
|
"eval_steps_per_second": 1.805, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00011693829731840665, |
|
"loss": 2.1453, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00011662587867742774, |
|
"loss": 2.1251, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00011631346003644882, |
|
"loss": 2.1412, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00011600104139546991, |
|
"loss": 2.1033, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.000115688622754491, |
|
"loss": 2.1219, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00011537620411351209, |
|
"loss": 2.1831, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011506378547253317, |
|
"loss": 2.1434, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011475136683155427, |
|
"loss": 2.1439, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011443894819057536, |
|
"loss": 2.1377, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011412652954959646, |
|
"loss": 2.1345, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 2.342855453491211, |
|
"eval_runtime": 69.2714, |
|
"eval_samples_per_second": 28.872, |
|
"eval_steps_per_second": 1.804, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00011381411090861754, |
|
"loss": 2.1527, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00011350169226763864, |
|
"loss": 2.1737, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00011318927362665972, |
|
"loss": 2.137, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011287685498568081, |
|
"loss": 2.1616, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0001125644363447019, |
|
"loss": 2.1688, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011225201770372299, |
|
"loss": 2.1746, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00011193959906274407, |
|
"loss": 2.1552, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00011162718042176515, |
|
"loss": 2.1643, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00011131476178078625, |
|
"loss": 2.1494, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00011100234313980733, |
|
"loss": 2.1112, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_loss": 2.34304141998291, |
|
"eval_runtime": 72.1422, |
|
"eval_samples_per_second": 27.723, |
|
"eval_steps_per_second": 1.733, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00011068992449882843, |
|
"loss": 2.1505, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00011037750585784951, |
|
"loss": 2.1722, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.0001100650872168706, |
|
"loss": 2.1582, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010975266857589169, |
|
"loss": 2.1806, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010944024993491278, |
|
"loss": 2.1508, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010912783129393386, |
|
"loss": 2.1654, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010881541265295496, |
|
"loss": 2.131, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010850299401197604, |
|
"loss": 2.1301, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010819057537099712, |
|
"loss": 2.1312, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010787815673001822, |
|
"loss": 2.1301, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_loss": 2.3404922485351562, |
|
"eval_runtime": 71.3367, |
|
"eval_samples_per_second": 28.036, |
|
"eval_steps_per_second": 1.752, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010758135902108825, |
|
"loss": 2.1398, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010726894038010933, |
|
"loss": 2.1449, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010695652173913043, |
|
"loss": 2.1498, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010664410309815151, |
|
"loss": 2.1484, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0001063316844571726, |
|
"loss": 2.1705, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010601926581619368, |
|
"loss": 2.1236, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010570684717521478, |
|
"loss": 2.1435, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010539442853423586, |
|
"loss": 2.1656, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010508200989325696, |
|
"loss": 2.1459, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010476959125227804, |
|
"loss": 2.1392, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 2.3410892486572266, |
|
"eval_runtime": 72.1407, |
|
"eval_samples_per_second": 27.724, |
|
"eval_steps_per_second": 1.733, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010445717261129913, |
|
"loss": 2.1399, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010414475397032022, |
|
"loss": 2.1979, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0001038323353293413, |
|
"loss": 2.1596, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0001035199166883624, |
|
"loss": 2.1817, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00010320749804738348, |
|
"loss": 2.0972, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010289507940640457, |
|
"loss": 2.1293, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010258266076542565, |
|
"loss": 2.1362, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010227024212444675, |
|
"loss": 2.1474, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010195782348346783, |
|
"loss": 2.2004, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010164540484248893, |
|
"loss": 2.1221, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 2.340029716491699, |
|
"eval_runtime": 72.0796, |
|
"eval_samples_per_second": 27.747, |
|
"eval_steps_per_second": 1.734, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010133298620151001, |
|
"loss": 2.1782, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010102056756053109, |
|
"loss": 2.1358, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010070814891955218, |
|
"loss": 2.122, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010039573027857327, |
|
"loss": 2.1494, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010008331163759436, |
|
"loss": 2.1522, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.977089299661544e-05, |
|
"loss": 2.1241, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.945847435563654e-05, |
|
"loss": 2.1456, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.914605571465763e-05, |
|
"loss": 2.1495, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.883363707367873e-05, |
|
"loss": 2.1734, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.852121843269981e-05, |
|
"loss": 2.1711, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 2.339312791824341, |
|
"eval_runtime": 69.2994, |
|
"eval_samples_per_second": 28.86, |
|
"eval_steps_per_second": 1.804, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.820879979172091e-05, |
|
"loss": 2.1483, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.789638115074199e-05, |
|
"loss": 2.124, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.758396250976308e-05, |
|
"loss": 2.1337, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.727154386878417e-05, |
|
"loss": 2.137, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.695912522780526e-05, |
|
"loss": 2.1225, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.664670658682634e-05, |
|
"loss": 2.1384, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.633428794584743e-05, |
|
"loss": 2.1052, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.602186930486852e-05, |
|
"loss": 2.1489, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.57094506638896e-05, |
|
"loss": 2.1154, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.53970320229107e-05, |
|
"loss": 2.1476, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 2.3396096229553223, |
|
"eval_runtime": 69.2833, |
|
"eval_samples_per_second": 28.867, |
|
"eval_steps_per_second": 1.804, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.508461338193178e-05, |
|
"loss": 2.1109, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.477219474095288e-05, |
|
"loss": 2.0973, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.445977609997396e-05, |
|
"loss": 2.1281, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.414735745899505e-05, |
|
"loss": 2.1216, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.383493881801614e-05, |
|
"loss": 2.1323, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.352252017703723e-05, |
|
"loss": 2.1477, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.321010153605831e-05, |
|
"loss": 2.1309, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.28976828950794e-05, |
|
"loss": 2.0899, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.258526425410049e-05, |
|
"loss": 2.1402, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.227284561312157e-05, |
|
"loss": 2.0768, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 2.3376858234405518, |
|
"eval_runtime": 69.4568, |
|
"eval_samples_per_second": 28.795, |
|
"eval_steps_per_second": 1.8, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.196042697214267e-05, |
|
"loss": 2.1405, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.164800833116375e-05, |
|
"loss": 2.1118, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.133558969018484e-05, |
|
"loss": 2.1525, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.102317104920593e-05, |
|
"loss": 2.1369, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.071075240822702e-05, |
|
"loss": 2.1683, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.03983337672481e-05, |
|
"loss": 2.1193, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.00859151262692e-05, |
|
"loss": 2.1222, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.977349648529028e-05, |
|
"loss": 2.1461, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.946107784431136e-05, |
|
"loss": 2.1106, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.914865920333246e-05, |
|
"loss": 2.1307, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 2.3381118774414062, |
|
"eval_runtime": 69.5609, |
|
"eval_samples_per_second": 28.752, |
|
"eval_steps_per_second": 1.797, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.883624056235354e-05, |
|
"loss": 2.1679, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.852382192137464e-05, |
|
"loss": 2.1418, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.821140328039572e-05, |
|
"loss": 2.1238, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.789898463941681e-05, |
|
"loss": 2.0995, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.75865659984379e-05, |
|
"loss": 2.1596, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.727414735745899e-05, |
|
"loss": 2.1478, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.696172871648007e-05, |
|
"loss": 2.1299, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.664931007550115e-05, |
|
"loss": 2.1405, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.633689143452225e-05, |
|
"loss": 2.174, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.602447279354333e-05, |
|
"loss": 2.129, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_loss": 2.337769031524658, |
|
"eval_runtime": 69.7472, |
|
"eval_samples_per_second": 28.675, |
|
"eval_steps_per_second": 1.792, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.571205415256443e-05, |
|
"loss": 2.1368, |
|
"step": 13820 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.539963551158551e-05, |
|
"loss": 2.1573, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.50872168706066e-05, |
|
"loss": 2.1132, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.477479822962769e-05, |
|
"loss": 2.1131, |
|
"step": 13880 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.446237958864878e-05, |
|
"loss": 2.1351, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.414996094766986e-05, |
|
"loss": 2.1738, |
|
"step": 13920 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.383754230669096e-05, |
|
"loss": 2.1551, |
|
"step": 13940 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.352512366571204e-05, |
|
"loss": 2.1195, |
|
"step": 13960 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.321270502473312e-05, |
|
"loss": 2.1125, |
|
"step": 13980 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.290028638375422e-05, |
|
"loss": 2.1549, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_loss": 2.337301731109619, |
|
"eval_runtime": 69.7462, |
|
"eval_samples_per_second": 28.675, |
|
"eval_steps_per_second": 1.792, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.25878677427753e-05, |
|
"loss": 2.1573, |
|
"step": 14020 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.22754491017964e-05, |
|
"loss": 2.1125, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.196303046081748e-05, |
|
"loss": 2.161, |
|
"step": 14060 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.165061181983857e-05, |
|
"loss": 2.1511, |
|
"step": 14080 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.133819317885967e-05, |
|
"loss": 2.1737, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.102577453788076e-05, |
|
"loss": 2.1158, |
|
"step": 14120 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.071335589690184e-05, |
|
"loss": 2.1398, |
|
"step": 14140 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.040093725592294e-05, |
|
"loss": 2.1183, |
|
"step": 14160 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.008851861494402e-05, |
|
"loss": 2.1295, |
|
"step": 14180 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.977609997396512e-05, |
|
"loss": 2.1416, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_loss": 2.336796760559082, |
|
"eval_runtime": 69.3578, |
|
"eval_samples_per_second": 28.836, |
|
"eval_steps_per_second": 1.802, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.94636813329862e-05, |
|
"loss": 2.1461, |
|
"step": 14220 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.91512626920073e-05, |
|
"loss": 2.0931, |
|
"step": 14240 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.883884405102838e-05, |
|
"loss": 2.1341, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.852642541004946e-05, |
|
"loss": 2.1369, |
|
"step": 14280 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.821400676907055e-05, |
|
"loss": 2.1431, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.790158812809164e-05, |
|
"loss": 2.1508, |
|
"step": 14320 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.758916948711273e-05, |
|
"loss": 2.1456, |
|
"step": 14340 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.727675084613381e-05, |
|
"loss": 2.1448, |
|
"step": 14360 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.696433220515491e-05, |
|
"loss": 2.1637, |
|
"step": 14380 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.665191356417599e-05, |
|
"loss": 2.114, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_loss": 2.3362655639648438, |
|
"eval_runtime": 69.5792, |
|
"eval_samples_per_second": 28.744, |
|
"eval_steps_per_second": 1.797, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.633949492319709e-05, |
|
"loss": 2.1222, |
|
"step": 14420 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.602707628221817e-05, |
|
"loss": 2.1776, |
|
"step": 14440 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.57302785732882e-05, |
|
"loss": 2.1414, |
|
"step": 14460 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.541785993230929e-05, |
|
"loss": 2.1231, |
|
"step": 14480 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.510544129133038e-05, |
|
"loss": 2.1345, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.479302265035147e-05, |
|
"loss": 2.1339, |
|
"step": 14520 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.448060400937255e-05, |
|
"loss": 2.1562, |
|
"step": 14540 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.416818536839363e-05, |
|
"loss": 2.1649, |
|
"step": 14560 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.385576672741473e-05, |
|
"loss": 2.1339, |
|
"step": 14580 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.354334808643581e-05, |
|
"loss": 2.1347, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 2.335818290710449, |
|
"eval_runtime": 69.5131, |
|
"eval_samples_per_second": 28.772, |
|
"eval_steps_per_second": 1.798, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.323092944545691e-05, |
|
"loss": 2.1078, |
|
"step": 14620 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.291851080447799e-05, |
|
"loss": 2.1446, |
|
"step": 14640 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.260609216349908e-05, |
|
"loss": 2.1076, |
|
"step": 14660 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.229367352252017e-05, |
|
"loss": 2.1548, |
|
"step": 14680 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.198125488154126e-05, |
|
"loss": 2.1317, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.166883624056234e-05, |
|
"loss": 2.0991, |
|
"step": 14720 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.135641759958343e-05, |
|
"loss": 2.1507, |
|
"step": 14740 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.104399895860452e-05, |
|
"loss": 2.1173, |
|
"step": 14760 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.073158031762562e-05, |
|
"loss": 2.104, |
|
"step": 14780 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.043478260869565e-05, |
|
"loss": 2.1118, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_loss": 2.334048271179199, |
|
"eval_runtime": 69.3816, |
|
"eval_samples_per_second": 28.826, |
|
"eval_steps_per_second": 1.802, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.012236396771674e-05, |
|
"loss": 2.0738, |
|
"step": 14820 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.980994532673782e-05, |
|
"loss": 2.1221, |
|
"step": 14840 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.94975266857589e-05, |
|
"loss": 2.1531, |
|
"step": 14860 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.918510804478e-05, |
|
"loss": 2.1318, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.887268940380108e-05, |
|
"loss": 2.1251, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.856027076282218e-05, |
|
"loss": 2.1212, |
|
"step": 14920 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.824785212184326e-05, |
|
"loss": 2.0927, |
|
"step": 14940 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.793543348086436e-05, |
|
"loss": 2.1277, |
|
"step": 14960 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.762301483988544e-05, |
|
"loss": 2.156, |
|
"step": 14980 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.731059619890653e-05, |
|
"loss": 2.1276, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 2.3340351581573486, |
|
"eval_runtime": 69.2926, |
|
"eval_samples_per_second": 28.863, |
|
"eval_steps_per_second": 1.804, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.699817755792761e-05, |
|
"loss": 2.1313, |
|
"step": 15020 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.668575891694871e-05, |
|
"loss": 2.1452, |
|
"step": 15040 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.637334027596979e-05, |
|
"loss": 2.1148, |
|
"step": 15060 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.606092163499087e-05, |
|
"loss": 2.1193, |
|
"step": 15080 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.574850299401197e-05, |
|
"loss": 2.1672, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.543608435303305e-05, |
|
"loss": 2.0789, |
|
"step": 15120 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.512366571205415e-05, |
|
"loss": 2.1438, |
|
"step": 15140 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.481124707107523e-05, |
|
"loss": 2.1597, |
|
"step": 15160 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.449882843009632e-05, |
|
"loss": 2.11, |
|
"step": 15180 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.418640978911742e-05, |
|
"loss": 2.1279, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 2.3344008922576904, |
|
"eval_runtime": 69.3363, |
|
"eval_samples_per_second": 28.845, |
|
"eval_steps_per_second": 1.803, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.38739911481385e-05, |
|
"loss": 2.1459, |
|
"step": 15220 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.35615725071596e-05, |
|
"loss": 2.1702, |
|
"step": 15240 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.324915386618068e-05, |
|
"loss": 2.1262, |
|
"step": 15260 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.293673522520177e-05, |
|
"loss": 2.0988, |
|
"step": 15280 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.262431658422286e-05, |
|
"loss": 2.1224, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.231189794324394e-05, |
|
"loss": 2.1102, |
|
"step": 15320 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.199947930226503e-05, |
|
"loss": 2.1168, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.168706066128611e-05, |
|
"loss": 2.1205, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.137464202030721e-05, |
|
"loss": 2.0855, |
|
"step": 15380 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.106222337932829e-05, |
|
"loss": 2.1548, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 2.333451271057129, |
|
"eval_runtime": 69.3334, |
|
"eval_samples_per_second": 28.846, |
|
"eval_steps_per_second": 1.803, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.074980473834938e-05, |
|
"loss": 2.1433, |
|
"step": 15420 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.043738609737047e-05, |
|
"loss": 2.123, |
|
"step": 15440 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.012496745639156e-05, |
|
"loss": 2.0965, |
|
"step": 15460 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.9812548815412647e-05, |
|
"loss": 2.1498, |
|
"step": 15480 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.9500130174433735e-05, |
|
"loss": 2.1456, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.9187711533454824e-05, |
|
"loss": 2.1295, |
|
"step": 15520 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.887529289247591e-05, |
|
"loss": 2.108, |
|
"step": 15540 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.8562874251497e-05, |
|
"loss": 2.1592, |
|
"step": 15560 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.825045561051809e-05, |
|
"loss": 2.1214, |
|
"step": 15580 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.793803696953918e-05, |
|
"loss": 2.1561, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_loss": 2.3329403400421143, |
|
"eval_runtime": 69.6034, |
|
"eval_samples_per_second": 28.734, |
|
"eval_steps_per_second": 1.796, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.762561832856026e-05, |
|
"loss": 2.1382, |
|
"step": 15620 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.731319968758135e-05, |
|
"loss": 2.109, |
|
"step": 15640 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.700078104660244e-05, |
|
"loss": 2.1283, |
|
"step": 15660 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.6688362405623526e-05, |
|
"loss": 2.15, |
|
"step": 15680 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.6375943764644615e-05, |
|
"loss": 2.1125, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.6063525123665704e-05, |
|
"loss": 2.1709, |
|
"step": 15720 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.575110648268679e-05, |
|
"loss": 2.1622, |
|
"step": 15740 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.543868784170789e-05, |
|
"loss": 2.0769, |
|
"step": 15760 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.5126269200728976e-05, |
|
"loss": 2.137, |
|
"step": 15780 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.4813850559750065e-05, |
|
"loss": 2.1294, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 2.3324475288391113, |
|
"eval_runtime": 69.559, |
|
"eval_samples_per_second": 28.753, |
|
"eval_steps_per_second": 1.797, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.4501431918771154e-05, |
|
"loss": 2.1425, |
|
"step": 15820 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.418901327779224e-05, |
|
"loss": 2.128, |
|
"step": 15840 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.387659463681333e-05, |
|
"loss": 2.1553, |
|
"step": 15860 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.356417599583441e-05, |
|
"loss": 2.1339, |
|
"step": 15880 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.32517573548555e-05, |
|
"loss": 2.1536, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.293933871387659e-05, |
|
"loss": 2.1669, |
|
"step": 15920 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.262692007289768e-05, |
|
"loss": 2.122, |
|
"step": 15940 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.231450143191877e-05, |
|
"loss": 2.1435, |
|
"step": 15960 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.2002082790939856e-05, |
|
"loss": 2.1406, |
|
"step": 15980 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.1689664149960945e-05, |
|
"loss": 2.1174, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 2.332836866378784, |
|
"eval_runtime": 69.3739, |
|
"eval_samples_per_second": 28.829, |
|
"eval_steps_per_second": 1.802, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.137724550898203e-05, |
|
"loss": 2.1286, |
|
"step": 16020 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.106482686800312e-05, |
|
"loss": 2.1343, |
|
"step": 16040 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.075240822702421e-05, |
|
"loss": 2.1134, |
|
"step": 16060 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.043998958604529e-05, |
|
"loss": 2.1633, |
|
"step": 16080 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.012757094506638e-05, |
|
"loss": 2.1473, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.981515230408747e-05, |
|
"loss": 2.1535, |
|
"step": 16120 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.950273366310856e-05, |
|
"loss": 2.112, |
|
"step": 16140 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.919031502212965e-05, |
|
"loss": 2.1399, |
|
"step": 16160 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.8877896381150736e-05, |
|
"loss": 2.0913, |
|
"step": 16180 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.8565477740171824e-05, |
|
"loss": 2.1179, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_loss": 2.332409143447876, |
|
"eval_runtime": 69.3294, |
|
"eval_samples_per_second": 28.848, |
|
"eval_steps_per_second": 1.803, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.825305909919291e-05, |
|
"loss": 2.1756, |
|
"step": 16220 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.7940640458214e-05, |
|
"loss": 2.1466, |
|
"step": 16240 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.762822181723509e-05, |
|
"loss": 2.1443, |
|
"step": 16260 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.731580317625618e-05, |
|
"loss": 2.1207, |
|
"step": 16280 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.700338453527726e-05, |
|
"loss": 2.1275, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.669096589429835e-05, |
|
"loss": 2.1305, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.6378547253319445e-05, |
|
"loss": 2.134, |
|
"step": 16340 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.6066128612340534e-05, |
|
"loss": 2.1681, |
|
"step": 16360 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.575370997136162e-05, |
|
"loss": 2.1627, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.544129133038271e-05, |
|
"loss": 2.1421, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 2.3318614959716797, |
|
"eval_runtime": 69.3251, |
|
"eval_samples_per_second": 28.85, |
|
"eval_steps_per_second": 1.803, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.51288726894038e-05, |
|
"loss": 2.1225, |
|
"step": 16420 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.481645404842489e-05, |
|
"loss": 2.156, |
|
"step": 16440 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.450403540744598e-05, |
|
"loss": 2.1573, |
|
"step": 16460 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.4191616766467066e-05, |
|
"loss": 2.1295, |
|
"step": 16480 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.3879198125488154e-05, |
|
"loss": 2.14, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.356677948450924e-05, |
|
"loss": 2.1046, |
|
"step": 16520 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.3254360843530325e-05, |
|
"loss": 2.1201, |
|
"step": 16540 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.2941942202551413e-05, |
|
"loss": 2.1767, |
|
"step": 16560 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.26295235615725e-05, |
|
"loss": 2.1244, |
|
"step": 16580 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.231710492059359e-05, |
|
"loss": 2.1301, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_loss": 2.331899881362915, |
|
"eval_runtime": 69.3398, |
|
"eval_samples_per_second": 28.843, |
|
"eval_steps_per_second": 1.803, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.200468627961468e-05, |
|
"loss": 2.1022, |
|
"step": 16620 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.169226763863577e-05, |
|
"loss": 2.1121, |
|
"step": 16640 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.137984899765686e-05, |
|
"loss": 2.1014, |
|
"step": 16660 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.1067430356677945e-05, |
|
"loss": 2.1867, |
|
"step": 16680 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.0755011715699034e-05, |
|
"loss": 2.1055, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.044259307472012e-05, |
|
"loss": 2.1435, |
|
"step": 16720 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.013017443374121e-05, |
|
"loss": 2.09, |
|
"step": 16740 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3.981775579276229e-05, |
|
"loss": 2.1317, |
|
"step": 16760 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.950533715178338e-05, |
|
"loss": 2.0683, |
|
"step": 16780 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.919291851080447e-05, |
|
"loss": 2.1249, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_loss": 2.331566572189331, |
|
"eval_runtime": 69.3154, |
|
"eval_samples_per_second": 28.854, |
|
"eval_steps_per_second": 1.803, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.888049986982556e-05, |
|
"loss": 2.164, |
|
"step": 16820 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.856808122884665e-05, |
|
"loss": 2.16, |
|
"step": 16840 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.8255662587867736e-05, |
|
"loss": 2.1603, |
|
"step": 16860 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.7943243946888825e-05, |
|
"loss": 2.1346, |
|
"step": 16880 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.7630825305909914e-05, |
|
"loss": 2.1082, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.7318406664931e-05, |
|
"loss": 2.1014, |
|
"step": 16920 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.700598802395209e-05, |
|
"loss": 2.1088, |
|
"step": 16940 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.669356938297318e-05, |
|
"loss": 2.0975, |
|
"step": 16960 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.638115074199427e-05, |
|
"loss": 2.1212, |
|
"step": 16980 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.606873210101536e-05, |
|
"loss": 2.1226, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_loss": 2.3310983180999756, |
|
"eval_runtime": 69.3945, |
|
"eval_samples_per_second": 28.821, |
|
"eval_steps_per_second": 1.801, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.5756313460036446e-05, |
|
"loss": 2.1318, |
|
"step": 17020 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.5443894819057534e-05, |
|
"loss": 2.1073, |
|
"step": 17040 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.513147617807862e-05, |
|
"loss": 2.1411, |
|
"step": 17060 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.481905753709971e-05, |
|
"loss": 2.0959, |
|
"step": 17080 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.45066388961208e-05, |
|
"loss": 2.0858, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.419422025514189e-05, |
|
"loss": 2.1174, |
|
"step": 17120 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.388180161416298e-05, |
|
"loss": 2.1459, |
|
"step": 17140 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.3569382973184066e-05, |
|
"loss": 2.1425, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.3256964332205155e-05, |
|
"loss": 2.0971, |
|
"step": 17180 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.2944545691226243e-05, |
|
"loss": 2.1176, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 2.330962896347046, |
|
"eval_runtime": 69.3407, |
|
"eval_samples_per_second": 28.843, |
|
"eval_steps_per_second": 1.803, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.2632127050247325e-05, |
|
"loss": 2.1471, |
|
"step": 17220 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.2319708409268414e-05, |
|
"loss": 2.1064, |
|
"step": 17240 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.20072897682895e-05, |
|
"loss": 2.1347, |
|
"step": 17260 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.169487112731059e-05, |
|
"loss": 2.142, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.138245248633168e-05, |
|
"loss": 2.1773, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.107003384535277e-05, |
|
"loss": 2.1489, |
|
"step": 17320 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.075761520437386e-05, |
|
"loss": 2.1257, |
|
"step": 17340 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.044519656339495e-05, |
|
"loss": 2.1288, |
|
"step": 17360 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.0132777922416038e-05, |
|
"loss": 2.1258, |
|
"step": 17380 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.9820359281437123e-05, |
|
"loss": 2.1322, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 2.3309593200683594, |
|
"eval_runtime": 69.3923, |
|
"eval_samples_per_second": 28.822, |
|
"eval_steps_per_second": 1.801, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.9507940640458212e-05, |
|
"loss": 2.1495, |
|
"step": 17420 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.91955219994793e-05, |
|
"loss": 2.0843, |
|
"step": 17440 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.888310335850039e-05, |
|
"loss": 2.11, |
|
"step": 17460 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.8570684717521478e-05, |
|
"loss": 2.1005, |
|
"step": 17480 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.827388700859151e-05, |
|
"loss": 2.1302, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.79614683676126e-05, |
|
"loss": 2.1086, |
|
"step": 17520 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.7649049726633688e-05, |
|
"loss": 2.1302, |
|
"step": 17540 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.7336631085654777e-05, |
|
"loss": 2.1417, |
|
"step": 17560 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.7024212444675862e-05, |
|
"loss": 2.1369, |
|
"step": 17580 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.671179380369695e-05, |
|
"loss": 2.1384, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_loss": 2.33089017868042, |
|
"eval_runtime": 69.3747, |
|
"eval_samples_per_second": 28.829, |
|
"eval_steps_per_second": 1.802, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.639937516271804e-05, |
|
"loss": 2.1243, |
|
"step": 17620 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.6086956521739128e-05, |
|
"loss": 2.1161, |
|
"step": 17640 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.5774537880760217e-05, |
|
"loss": 2.1051, |
|
"step": 17660 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.5462119239781302e-05, |
|
"loss": 2.0762, |
|
"step": 17680 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.514970059880239e-05, |
|
"loss": 2.1105, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.483728195782348e-05, |
|
"loss": 2.1535, |
|
"step": 17720 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.452486331684457e-05, |
|
"loss": 2.1706, |
|
"step": 17740 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.421244467586566e-05, |
|
"loss": 2.0857, |
|
"step": 17760 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.390002603488675e-05, |
|
"loss": 2.1553, |
|
"step": 17780 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.3587607393907834e-05, |
|
"loss": 2.0983, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 2.3304569721221924, |
|
"eval_runtime": 69.35, |
|
"eval_samples_per_second": 28.839, |
|
"eval_steps_per_second": 1.802, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.3275188752928923e-05, |
|
"loss": 2.1212, |
|
"step": 17820 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.296277011195001e-05, |
|
"loss": 2.0816, |
|
"step": 17840 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.26503514709711e-05, |
|
"loss": 2.0935, |
|
"step": 17860 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.233793282999219e-05, |
|
"loss": 2.1576, |
|
"step": 17880 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.2025514189013274e-05, |
|
"loss": 2.1076, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.1713095548034362e-05, |
|
"loss": 2.1184, |
|
"step": 17920 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.140067690705545e-05, |
|
"loss": 2.1169, |
|
"step": 17940 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.108825826607654e-05, |
|
"loss": 2.1442, |
|
"step": 17960 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.077583962509763e-05, |
|
"loss": 2.1332, |
|
"step": 17980 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.0463420984118717e-05, |
|
"loss": 2.1553, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 2.330599069595337, |
|
"eval_runtime": 69.346, |
|
"eval_samples_per_second": 28.841, |
|
"eval_steps_per_second": 1.803, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.0151002343139802e-05, |
|
"loss": 2.1055, |
|
"step": 18020 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.9838583702160894e-05, |
|
"loss": 2.0778, |
|
"step": 18040 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.9526165061181983e-05, |
|
"loss": 2.143, |
|
"step": 18060 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.921374642020307e-05, |
|
"loss": 2.0886, |
|
"step": 18080 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.890132777922416e-05, |
|
"loss": 2.1236, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.858890913824525e-05, |
|
"loss": 2.1307, |
|
"step": 18120 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.8276490497266334e-05, |
|
"loss": 2.1192, |
|
"step": 18140 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.7964071856287423e-05, |
|
"loss": 2.0999, |
|
"step": 18160 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.765165321530851e-05, |
|
"loss": 2.0792, |
|
"step": 18180 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.73392345743296e-05, |
|
"loss": 2.1015, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 2.330050230026245, |
|
"eval_runtime": 69.3278, |
|
"eval_samples_per_second": 28.848, |
|
"eval_steps_per_second": 1.803, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.702681593335069e-05, |
|
"loss": 2.1226, |
|
"step": 18220 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.6714397292371778e-05, |
|
"loss": 2.0924, |
|
"step": 18240 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.6401978651392866e-05, |
|
"loss": 2.1272, |
|
"step": 18260 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.6089560010413955e-05, |
|
"loss": 2.1175, |
|
"step": 18280 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.577714136943504e-05, |
|
"loss": 2.1396, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.546472272845613e-05, |
|
"loss": 2.1514, |
|
"step": 18320 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.5152304087477217e-05, |
|
"loss": 2.1257, |
|
"step": 18340 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.4839885446498306e-05, |
|
"loss": 2.1459, |
|
"step": 18360 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.4527466805519396e-05, |
|
"loss": 2.09, |
|
"step": 18380 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.4215048164540483e-05, |
|
"loss": 2.1442, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 2.330048084259033, |
|
"eval_runtime": 69.2975, |
|
"eval_samples_per_second": 28.861, |
|
"eval_steps_per_second": 1.804, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.3902629523561572e-05, |
|
"loss": 2.1816, |
|
"step": 18420 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.3590210882582659e-05, |
|
"loss": 2.0965, |
|
"step": 18440 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.3277792241603748e-05, |
|
"loss": 2.1178, |
|
"step": 18460 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.2965373600624836e-05, |
|
"loss": 2.1562, |
|
"step": 18480 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.2652954959645923e-05, |
|
"loss": 2.095, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.2340536318667012e-05, |
|
"loss": 2.1522, |
|
"step": 18520 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.2028117677688102e-05, |
|
"loss": 2.1729, |
|
"step": 18540 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.1715699036709189e-05, |
|
"loss": 2.141, |
|
"step": 18560 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.1403280395730278e-05, |
|
"loss": 2.148, |
|
"step": 18580 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.1090861754751366e-05, |
|
"loss": 2.1619, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_loss": 2.329728603363037, |
|
"eval_runtime": 69.3412, |
|
"eval_samples_per_second": 28.843, |
|
"eval_steps_per_second": 1.803, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.0778443113772453e-05, |
|
"loss": 2.1199, |
|
"step": 18620 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.0466024472793542e-05, |
|
"loss": 2.131, |
|
"step": 18640 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.0153605831814629e-05, |
|
"loss": 2.1512, |
|
"step": 18660 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.84118719083572e-06, |
|
"loss": 2.1292, |
|
"step": 18680 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 9.528768549856808e-06, |
|
"loss": 2.0928, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 9.216349908877897e-06, |
|
"loss": 2.1168, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 8.903931267898984e-06, |
|
"loss": 2.1316, |
|
"step": 18740 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.591512626920072e-06, |
|
"loss": 2.1198, |
|
"step": 18760 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.279093985941161e-06, |
|
"loss": 2.1226, |
|
"step": 18780 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.96667534496225e-06, |
|
"loss": 2.1234, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_loss": 2.3294034004211426, |
|
"eval_runtime": 69.3303, |
|
"eval_samples_per_second": 28.847, |
|
"eval_steps_per_second": 1.803, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.654256703983337e-06, |
|
"loss": 2.1251, |
|
"step": 18820 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 7.341838063004425e-06, |
|
"loss": 2.1278, |
|
"step": 18840 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 7.029419422025514e-06, |
|
"loss": 2.1115, |
|
"step": 18860 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.717000781046602e-06, |
|
"loss": 2.1468, |
|
"step": 18880 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 6.4045821400676894e-06, |
|
"loss": 2.0903, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 6.092163499088779e-06, |
|
"loss": 2.1271, |
|
"step": 18920 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.779744858109867e-06, |
|
"loss": 2.1253, |
|
"step": 18940 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.4673262171309545e-06, |
|
"loss": 2.0903, |
|
"step": 18960 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.154907576152043e-06, |
|
"loss": 2.1566, |
|
"step": 18980 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.842488935173132e-06, |
|
"loss": 2.1477, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_loss": 2.3293075561523438, |
|
"eval_runtime": 69.6518, |
|
"eval_samples_per_second": 28.714, |
|
"eval_steps_per_second": 1.795, |
|
"step": 19000 |
|
} |
|
], |
|
"max_steps": 19305, |
|
"num_train_epochs": 3, |
|
"total_flos": 5.3158443458154725e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|