|
{ |
|
"best_metric": 3.5347280502319336, |
|
"best_model_checkpoint": "./model_tweets_2020_Q2_90/checkpoint-2080000", |
|
"epoch": 50.52525209995579, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 4.064033031463623, |
|
"eval_runtime": 40.7192, |
|
"eval_samples_per_second": 982.337, |
|
"eval_steps_per_second": 61.396, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.0726666666666665e-07, |
|
"loss": 4.2654, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 3.941408634185791, |
|
"eval_runtime": 40.7754, |
|
"eval_samples_per_second": 980.983, |
|
"eval_steps_per_second": 61.311, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 3.8956282138824463, |
|
"eval_runtime": 40.7547, |
|
"eval_samples_per_second": 981.482, |
|
"eval_steps_per_second": 61.343, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.0453333333333336e-07, |
|
"loss": 4.0459, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 3.8526694774627686, |
|
"eval_runtime": 40.8002, |
|
"eval_samples_per_second": 980.388, |
|
"eval_steps_per_second": 61.274, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 3.8232262134552, |
|
"eval_runtime": 40.9229, |
|
"eval_samples_per_second": 977.449, |
|
"eval_steps_per_second": 61.091, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.018e-07, |
|
"loss": 3.9781, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 3.7806332111358643, |
|
"eval_runtime": 40.7016, |
|
"eval_samples_per_second": 982.762, |
|
"eval_steps_per_second": 61.423, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 3.7860567569732666, |
|
"eval_runtime": 40.8284, |
|
"eval_samples_per_second": 979.71, |
|
"eval_steps_per_second": 61.232, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.9906666666666667e-07, |
|
"loss": 3.9323, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 3.79296875, |
|
"eval_runtime": 40.6905, |
|
"eval_samples_per_second": 983.03, |
|
"eval_steps_per_second": 61.439, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 3.781362533569336, |
|
"eval_runtime": 40.7808, |
|
"eval_samples_per_second": 980.855, |
|
"eval_steps_per_second": 61.303, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.963333333333333e-07, |
|
"loss": 3.9224, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 3.781531572341919, |
|
"eval_runtime": 40.7108, |
|
"eval_samples_per_second": 982.541, |
|
"eval_steps_per_second": 61.409, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 3.7402968406677246, |
|
"eval_runtime": 40.9233, |
|
"eval_samples_per_second": 977.439, |
|
"eval_steps_per_second": 61.09, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.936e-07, |
|
"loss": 3.8924, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 3.7468085289001465, |
|
"eval_runtime": 40.9264, |
|
"eval_samples_per_second": 977.363, |
|
"eval_steps_per_second": 61.085, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 3.740011215209961, |
|
"eval_runtime": 40.805, |
|
"eval_samples_per_second": 980.273, |
|
"eval_steps_per_second": 61.267, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.908666666666667e-07, |
|
"loss": 3.879, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 3.7283473014831543, |
|
"eval_runtime": 41.0237, |
|
"eval_samples_per_second": 975.046, |
|
"eval_steps_per_second": 60.94, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 3.738088369369507, |
|
"eval_runtime": 40.8332, |
|
"eval_samples_per_second": 979.596, |
|
"eval_steps_per_second": 61.225, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.8813333333333334e-07, |
|
"loss": 3.8806, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 3.7072582244873047, |
|
"eval_runtime": 40.8641, |
|
"eval_samples_per_second": 978.853, |
|
"eval_steps_per_second": 61.178, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 3.7082958221435547, |
|
"eval_runtime": 40.6946, |
|
"eval_samples_per_second": 982.931, |
|
"eval_steps_per_second": 61.433, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.854e-07, |
|
"loss": 3.8659, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_loss": 3.69919490814209, |
|
"eval_runtime": 40.8002, |
|
"eval_samples_per_second": 980.387, |
|
"eval_steps_per_second": 61.274, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 3.695563316345215, |
|
"eval_runtime": 40.8922, |
|
"eval_samples_per_second": 978.182, |
|
"eval_steps_per_second": 61.136, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.8266666666666665e-07, |
|
"loss": 3.8634, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 3.674525022506714, |
|
"eval_runtime": 40.9185, |
|
"eval_samples_per_second": 977.552, |
|
"eval_steps_per_second": 61.097, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_loss": 3.70168399810791, |
|
"eval_runtime": 40.7392, |
|
"eval_samples_per_second": 981.855, |
|
"eval_steps_per_second": 61.366, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 3.799333333333333e-07, |
|
"loss": 3.8632, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_loss": 3.6960248947143555, |
|
"eval_runtime": 41.3044, |
|
"eval_samples_per_second": 968.42, |
|
"eval_steps_per_second": 60.526, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_loss": 3.7202460765838623, |
|
"eval_runtime": 40.9217, |
|
"eval_samples_per_second": 977.476, |
|
"eval_steps_per_second": 61.092, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.772e-07, |
|
"loss": 3.8416, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"eval_loss": 3.7108640670776367, |
|
"eval_runtime": 40.749, |
|
"eval_samples_per_second": 981.619, |
|
"eval_steps_per_second": 61.351, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"eval_loss": 3.694209098815918, |
|
"eval_runtime": 40.7078, |
|
"eval_samples_per_second": 982.612, |
|
"eval_steps_per_second": 61.413, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 3.7446666666666667e-07, |
|
"loss": 3.8368, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_loss": 3.6943516731262207, |
|
"eval_runtime": 40.8183, |
|
"eval_samples_per_second": 979.952, |
|
"eval_steps_per_second": 61.247, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"eval_loss": 3.6750781536102295, |
|
"eval_runtime": 40.7333, |
|
"eval_samples_per_second": 981.999, |
|
"eval_steps_per_second": 61.375, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 3.7173333333333333e-07, |
|
"loss": 3.8359, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 3.6815297603607178, |
|
"eval_runtime": 40.6706, |
|
"eval_samples_per_second": 983.512, |
|
"eval_steps_per_second": 61.47, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"eval_loss": 3.6914784908294678, |
|
"eval_runtime": 40.9289, |
|
"eval_samples_per_second": 977.304, |
|
"eval_steps_per_second": 61.082, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 3.69e-07, |
|
"loss": 3.8411, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_loss": 3.67960786819458, |
|
"eval_runtime": 40.7871, |
|
"eval_samples_per_second": 980.701, |
|
"eval_steps_per_second": 61.294, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_loss": 3.684703826904297, |
|
"eval_runtime": 40.8269, |
|
"eval_samples_per_second": 979.745, |
|
"eval_steps_per_second": 61.234, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 3.6626666666666664e-07, |
|
"loss": 3.8359, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"eval_loss": 3.6988320350646973, |
|
"eval_runtime": 40.7811, |
|
"eval_samples_per_second": 980.847, |
|
"eval_steps_per_second": 61.303, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"eval_loss": 3.679949998855591, |
|
"eval_runtime": 40.9831, |
|
"eval_samples_per_second": 976.011, |
|
"eval_steps_per_second": 61.001, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 3.6353333333333335e-07, |
|
"loss": 3.8268, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_loss": 3.681049108505249, |
|
"eval_runtime": 40.7829, |
|
"eval_samples_per_second": 980.804, |
|
"eval_steps_per_second": 61.3, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"eval_loss": 3.66385817527771, |
|
"eval_runtime": 40.8484, |
|
"eval_samples_per_second": 979.23, |
|
"eval_steps_per_second": 61.202, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 3.608e-07, |
|
"loss": 3.8172, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_loss": 3.666334390640259, |
|
"eval_runtime": 41.3519, |
|
"eval_samples_per_second": 967.307, |
|
"eval_steps_per_second": 60.457, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"eval_loss": 3.6838061809539795, |
|
"eval_runtime": 40.9071, |
|
"eval_samples_per_second": 977.825, |
|
"eval_steps_per_second": 61.114, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 3.5806666666666666e-07, |
|
"loss": 3.8263, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 3.6755523681640625, |
|
"eval_runtime": 41.0373, |
|
"eval_samples_per_second": 974.723, |
|
"eval_steps_per_second": 60.92, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"eval_loss": 3.650726556777954, |
|
"eval_runtime": 40.8546, |
|
"eval_samples_per_second": 979.082, |
|
"eval_steps_per_second": 61.193, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 3.553333333333333e-07, |
|
"loss": 3.8215, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_loss": 3.640876054763794, |
|
"eval_runtime": 40.7809, |
|
"eval_samples_per_second": 980.851, |
|
"eval_steps_per_second": 61.303, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"eval_loss": 3.678966760635376, |
|
"eval_runtime": 41.0811, |
|
"eval_samples_per_second": 973.684, |
|
"eval_steps_per_second": 60.855, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 3.5259999999999997e-07, |
|
"loss": 3.8189, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"eval_loss": 3.6679444313049316, |
|
"eval_runtime": 40.9298, |
|
"eval_samples_per_second": 977.283, |
|
"eval_steps_per_second": 61.08, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"eval_loss": 3.6443188190460205, |
|
"eval_runtime": 41.2575, |
|
"eval_samples_per_second": 969.52, |
|
"eval_steps_per_second": 60.595, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 3.498666666666667e-07, |
|
"loss": 3.8155, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_loss": 3.658766746520996, |
|
"eval_runtime": 40.7405, |
|
"eval_samples_per_second": 981.824, |
|
"eval_steps_per_second": 61.364, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"eval_loss": 3.644803524017334, |
|
"eval_runtime": 40.9906, |
|
"eval_samples_per_second": 975.834, |
|
"eval_steps_per_second": 60.99, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 3.4713333333333333e-07, |
|
"loss": 3.8075, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"eval_loss": 3.651991367340088, |
|
"eval_runtime": 40.88, |
|
"eval_samples_per_second": 978.474, |
|
"eval_steps_per_second": 61.155, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"eval_loss": 3.654082775115967, |
|
"eval_runtime": 40.8852, |
|
"eval_samples_per_second": 978.348, |
|
"eval_steps_per_second": 61.147, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 3.444e-07, |
|
"loss": 3.8064, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"eval_loss": 3.656877279281616, |
|
"eval_runtime": 40.8714, |
|
"eval_samples_per_second": 978.679, |
|
"eval_steps_per_second": 61.167, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"eval_loss": 3.6586239337921143, |
|
"eval_runtime": 40.7353, |
|
"eval_samples_per_second": 981.949, |
|
"eval_steps_per_second": 61.372, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 3.416666666666667e-07, |
|
"loss": 3.8092, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"eval_loss": 3.670098066329956, |
|
"eval_runtime": 41.244, |
|
"eval_samples_per_second": 969.839, |
|
"eval_steps_per_second": 60.615, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"eval_loss": 3.6543703079223633, |
|
"eval_runtime": 40.8099, |
|
"eval_samples_per_second": 980.155, |
|
"eval_steps_per_second": 61.26, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 3.3893333333333335e-07, |
|
"loss": 3.8032, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"eval_loss": 3.6668190956115723, |
|
"eval_runtime": 40.824, |
|
"eval_samples_per_second": 979.816, |
|
"eval_steps_per_second": 61.238, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"eval_loss": 3.663094997406006, |
|
"eval_runtime": 40.8379, |
|
"eval_samples_per_second": 979.482, |
|
"eval_steps_per_second": 61.218, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 3.3619999999999995e-07, |
|
"loss": 3.8062, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_loss": 3.6481242179870605, |
|
"eval_runtime": 40.8688, |
|
"eval_samples_per_second": 978.741, |
|
"eval_steps_per_second": 61.171, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"eval_loss": 3.6392152309417725, |
|
"eval_runtime": 40.7769, |
|
"eval_samples_per_second": 980.949, |
|
"eval_steps_per_second": 61.309, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 3.3346666666666666e-07, |
|
"loss": 3.7987, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_loss": 3.6482295989990234, |
|
"eval_runtime": 40.7877, |
|
"eval_samples_per_second": 980.689, |
|
"eval_steps_per_second": 61.293, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_loss": 3.635684013366699, |
|
"eval_runtime": 40.7447, |
|
"eval_samples_per_second": 981.723, |
|
"eval_steps_per_second": 61.358, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 3.307333333333333e-07, |
|
"loss": 3.7954, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"eval_loss": 3.633270263671875, |
|
"eval_runtime": 40.9048, |
|
"eval_samples_per_second": 977.881, |
|
"eval_steps_per_second": 61.118, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_loss": 3.6653008460998535, |
|
"eval_runtime": 40.6738, |
|
"eval_samples_per_second": 983.433, |
|
"eval_steps_per_second": 61.465, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 3.28e-07, |
|
"loss": 3.7938, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"eval_loss": 3.6266889572143555, |
|
"eval_runtime": 40.7255, |
|
"eval_samples_per_second": 982.185, |
|
"eval_steps_per_second": 61.387, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"eval_loss": 3.6489765644073486, |
|
"eval_runtime": 40.8291, |
|
"eval_samples_per_second": 979.693, |
|
"eval_steps_per_second": 61.231, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 3.252666666666667e-07, |
|
"loss": 3.7901, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"eval_loss": 3.641712188720703, |
|
"eval_runtime": 41.0566, |
|
"eval_samples_per_second": 974.264, |
|
"eval_steps_per_second": 60.892, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"eval_loss": 3.6263108253479004, |
|
"eval_runtime": 40.7345, |
|
"eval_samples_per_second": 981.968, |
|
"eval_steps_per_second": 61.373, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"learning_rate": 3.2253333333333334e-07, |
|
"loss": 3.7935, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"eval_loss": 3.6522746086120605, |
|
"eval_runtime": 40.7085, |
|
"eval_samples_per_second": 982.595, |
|
"eval_steps_per_second": 61.412, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"eval_loss": 3.6444039344787598, |
|
"eval_runtime": 40.8183, |
|
"eval_samples_per_second": 979.953, |
|
"eval_steps_per_second": 61.247, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 11.12, |
|
"learning_rate": 3.198e-07, |
|
"loss": 3.7951, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 11.12, |
|
"eval_loss": 3.622567892074585, |
|
"eval_runtime": 40.7606, |
|
"eval_samples_per_second": 981.339, |
|
"eval_steps_per_second": 61.334, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"eval_loss": 3.6346988677978516, |
|
"eval_runtime": 40.6211, |
|
"eval_samples_per_second": 984.71, |
|
"eval_steps_per_second": 61.544, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 3.1706666666666665e-07, |
|
"loss": 3.7861, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"eval_loss": 3.637178421020508, |
|
"eval_runtime": 40.7751, |
|
"eval_samples_per_second": 980.991, |
|
"eval_steps_per_second": 61.312, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"eval_loss": 3.6162924766540527, |
|
"eval_runtime": 40.7193, |
|
"eval_samples_per_second": 982.336, |
|
"eval_steps_per_second": 61.396, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"learning_rate": 3.1433333333333336e-07, |
|
"loss": 3.7846, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"eval_loss": 3.6299352645874023, |
|
"eval_runtime": 40.8861, |
|
"eval_samples_per_second": 978.327, |
|
"eval_steps_per_second": 61.145, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"eval_loss": 3.6330039501190186, |
|
"eval_runtime": 40.827, |
|
"eval_samples_per_second": 979.744, |
|
"eval_steps_per_second": 61.234, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 3.116e-07, |
|
"loss": 3.7778, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"eval_loss": 3.637084484100342, |
|
"eval_runtime": 40.7594, |
|
"eval_samples_per_second": 981.368, |
|
"eval_steps_per_second": 61.335, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"eval_loss": 3.634265661239624, |
|
"eval_runtime": 40.9272, |
|
"eval_samples_per_second": 977.346, |
|
"eval_steps_per_second": 61.084, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 3.0886666666666667e-07, |
|
"loss": 3.777, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"eval_loss": 3.62420392036438, |
|
"eval_runtime": 40.8724, |
|
"eval_samples_per_second": 978.655, |
|
"eval_steps_per_second": 61.166, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"eval_loss": 3.6119399070739746, |
|
"eval_runtime": 40.9368, |
|
"eval_samples_per_second": 977.116, |
|
"eval_steps_per_second": 61.07, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 3.061333333333333e-07, |
|
"loss": 3.778, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"eval_loss": 3.6167094707489014, |
|
"eval_runtime": 40.959, |
|
"eval_samples_per_second": 976.587, |
|
"eval_steps_per_second": 61.037, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"eval_loss": 3.6191112995147705, |
|
"eval_runtime": 41.1386, |
|
"eval_samples_per_second": 972.322, |
|
"eval_steps_per_second": 60.77, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"learning_rate": 3.034e-07, |
|
"loss": 3.7795, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"eval_loss": 3.622523546218872, |
|
"eval_runtime": 40.9825, |
|
"eval_samples_per_second": 976.027, |
|
"eval_steps_per_second": 61.002, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"eval_loss": 3.605618476867676, |
|
"eval_runtime": 41.1305, |
|
"eval_samples_per_second": 972.515, |
|
"eval_steps_per_second": 60.782, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 3.0066666666666663e-07, |
|
"loss": 3.7766, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"eval_loss": 3.613523483276367, |
|
"eval_runtime": 41.0605, |
|
"eval_samples_per_second": 974.172, |
|
"eval_steps_per_second": 60.886, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"eval_loss": 3.6168975830078125, |
|
"eval_runtime": 41.0389, |
|
"eval_samples_per_second": 974.684, |
|
"eval_steps_per_second": 60.918, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 2.9793333333333334e-07, |
|
"loss": 3.7729, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"eval_loss": 3.6035475730895996, |
|
"eval_runtime": 40.9659, |
|
"eval_samples_per_second": 976.423, |
|
"eval_steps_per_second": 61.026, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"eval_loss": 3.6109204292297363, |
|
"eval_runtime": 40.9828, |
|
"eval_samples_per_second": 976.02, |
|
"eval_steps_per_second": 61.001, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 2.952e-07, |
|
"loss": 3.7846, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"eval_loss": 3.617966413497925, |
|
"eval_runtime": 40.9537, |
|
"eval_samples_per_second": 976.714, |
|
"eval_steps_per_second": 61.045, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"eval_loss": 3.6171200275421143, |
|
"eval_runtime": 40.9901, |
|
"eval_samples_per_second": 975.845, |
|
"eval_steps_per_second": 60.99, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 2.9246666666666665e-07, |
|
"loss": 3.7726, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"eval_loss": 3.6182472705841064, |
|
"eval_runtime": 41.0924, |
|
"eval_samples_per_second": 973.416, |
|
"eval_steps_per_second": 60.838, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 14.65, |
|
"eval_loss": 3.608560085296631, |
|
"eval_runtime": 40.9759, |
|
"eval_samples_per_second": 976.185, |
|
"eval_steps_per_second": 61.012, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"learning_rate": 2.897333333333333e-07, |
|
"loss": 3.7717, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"eval_loss": 3.585219621658325, |
|
"eval_runtime": 41.0174, |
|
"eval_samples_per_second": 975.197, |
|
"eval_steps_per_second": 60.95, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_loss": 3.5882859230041504, |
|
"eval_runtime": 40.9445, |
|
"eval_samples_per_second": 976.932, |
|
"eval_steps_per_second": 61.058, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"learning_rate": 2.8699999999999996e-07, |
|
"loss": 3.7713, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"eval_loss": 3.605559825897217, |
|
"eval_runtime": 40.8496, |
|
"eval_samples_per_second": 979.203, |
|
"eval_steps_per_second": 61.2, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"eval_loss": 3.6003661155700684, |
|
"eval_runtime": 40.9885, |
|
"eval_samples_per_second": 975.883, |
|
"eval_steps_per_second": 60.993, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 2.8426666666666667e-07, |
|
"loss": 3.7745, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"eval_loss": 3.605851888656616, |
|
"eval_runtime": 40.9895, |
|
"eval_samples_per_second": 975.86, |
|
"eval_steps_per_second": 60.991, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"eval_loss": 3.615556240081787, |
|
"eval_runtime": 41.0234, |
|
"eval_samples_per_second": 975.053, |
|
"eval_steps_per_second": 60.941, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"learning_rate": 2.815333333333333e-07, |
|
"loss": 3.7557, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"eval_loss": 3.6028969287872314, |
|
"eval_runtime": 41.0782, |
|
"eval_samples_per_second": 973.753, |
|
"eval_steps_per_second": 60.86, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 3.609947681427002, |
|
"eval_runtime": 41.5445, |
|
"eval_samples_per_second": 962.823, |
|
"eval_steps_per_second": 60.176, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"learning_rate": 2.7880000000000003e-07, |
|
"loss": 3.7628, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"eval_loss": 3.601588249206543, |
|
"eval_runtime": 41.0762, |
|
"eval_samples_per_second": 973.8, |
|
"eval_steps_per_second": 60.863, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"eval_loss": 3.6008121967315674, |
|
"eval_runtime": 41.0463, |
|
"eval_samples_per_second": 974.508, |
|
"eval_steps_per_second": 60.907, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"learning_rate": 2.7606666666666664e-07, |
|
"loss": 3.7717, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"eval_loss": 3.597151517868042, |
|
"eval_runtime": 40.9425, |
|
"eval_samples_per_second": 976.979, |
|
"eval_steps_per_second": 61.061, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"eval_loss": 3.5838093757629395, |
|
"eval_runtime": 40.8764, |
|
"eval_samples_per_second": 978.56, |
|
"eval_steps_per_second": 61.16, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"learning_rate": 2.733333333333333e-07, |
|
"loss": 3.7616, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"eval_loss": 3.5868074893951416, |
|
"eval_runtime": 40.7672, |
|
"eval_samples_per_second": 981.181, |
|
"eval_steps_per_second": 61.324, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"eval_loss": 3.5833964347839355, |
|
"eval_runtime": 40.8806, |
|
"eval_samples_per_second": 978.459, |
|
"eval_steps_per_second": 61.154, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 17.18, |
|
"learning_rate": 2.706e-07, |
|
"loss": 3.7608, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 17.18, |
|
"eval_loss": 3.6065828800201416, |
|
"eval_runtime": 40.6947, |
|
"eval_samples_per_second": 982.928, |
|
"eval_steps_per_second": 61.433, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"eval_loss": 3.591146469116211, |
|
"eval_runtime": 40.6695, |
|
"eval_samples_per_second": 983.539, |
|
"eval_steps_per_second": 61.471, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"learning_rate": 2.6786666666666666e-07, |
|
"loss": 3.7625, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"eval_loss": 3.5996882915496826, |
|
"eval_runtime": 40.5695, |
|
"eval_samples_per_second": 985.963, |
|
"eval_steps_per_second": 61.623, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"eval_loss": 3.585501194000244, |
|
"eval_runtime": 40.7388, |
|
"eval_samples_per_second": 981.864, |
|
"eval_steps_per_second": 61.366, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"learning_rate": 2.651333333333333e-07, |
|
"loss": 3.7634, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"eval_loss": 3.5861265659332275, |
|
"eval_runtime": 40.647, |
|
"eval_samples_per_second": 984.082, |
|
"eval_steps_per_second": 61.505, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"eval_loss": 3.6021432876586914, |
|
"eval_runtime": 40.6989, |
|
"eval_samples_per_second": 982.827, |
|
"eval_steps_per_second": 61.427, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"learning_rate": 2.624e-07, |
|
"loss": 3.75, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"eval_loss": 3.59663987159729, |
|
"eval_runtime": 40.7681, |
|
"eval_samples_per_second": 981.16, |
|
"eval_steps_per_second": 61.322, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 18.36, |
|
"eval_loss": 3.5761475563049316, |
|
"eval_runtime": 40.7529, |
|
"eval_samples_per_second": 981.526, |
|
"eval_steps_per_second": 61.345, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 2.596666666666667e-07, |
|
"loss": 3.7492, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"eval_loss": 3.5757410526275635, |
|
"eval_runtime": 40.651, |
|
"eval_samples_per_second": 983.986, |
|
"eval_steps_per_second": 61.499, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 18.69, |
|
"eval_loss": 3.6122772693634033, |
|
"eval_runtime": 40.9703, |
|
"eval_samples_per_second": 976.316, |
|
"eval_steps_per_second": 61.02, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 18.86, |
|
"learning_rate": 2.5693333333333333e-07, |
|
"loss": 3.7522, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 18.86, |
|
"eval_loss": 3.584080934524536, |
|
"eval_runtime": 40.6825, |
|
"eval_samples_per_second": 983.223, |
|
"eval_steps_per_second": 61.451, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"eval_loss": 3.5830750465393066, |
|
"eval_runtime": 40.7667, |
|
"eval_samples_per_second": 981.193, |
|
"eval_steps_per_second": 61.325, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 2.542e-07, |
|
"loss": 3.7482, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"eval_loss": 3.5859768390655518, |
|
"eval_runtime": 40.7841, |
|
"eval_samples_per_second": 980.776, |
|
"eval_steps_per_second": 61.298, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 19.37, |
|
"eval_loss": 3.580415964126587, |
|
"eval_runtime": 40.6458, |
|
"eval_samples_per_second": 984.112, |
|
"eval_steps_per_second": 61.507, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 19.54, |
|
"learning_rate": 2.5146666666666664e-07, |
|
"loss": 3.75, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 19.54, |
|
"eval_loss": 3.573030471801758, |
|
"eval_runtime": 40.6766, |
|
"eval_samples_per_second": 983.365, |
|
"eval_steps_per_second": 61.46, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 19.7, |
|
"eval_loss": 3.595536708831787, |
|
"eval_runtime": 40.7802, |
|
"eval_samples_per_second": 980.869, |
|
"eval_steps_per_second": 61.304, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 19.87, |
|
"learning_rate": 2.4873333333333335e-07, |
|
"loss": 3.755, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 19.87, |
|
"eval_loss": 3.58677077293396, |
|
"eval_runtime": 40.6611, |
|
"eval_samples_per_second": 983.74, |
|
"eval_steps_per_second": 61.484, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 20.04, |
|
"eval_loss": 3.5992047786712646, |
|
"eval_runtime": 40.7082, |
|
"eval_samples_per_second": 982.602, |
|
"eval_steps_per_second": 61.413, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 2.46e-07, |
|
"loss": 3.7549, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"eval_loss": 3.565748929977417, |
|
"eval_runtime": 40.7168, |
|
"eval_samples_per_second": 982.395, |
|
"eval_steps_per_second": 61.4, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 20.38, |
|
"eval_loss": 3.5780434608459473, |
|
"eval_runtime": 40.7077, |
|
"eval_samples_per_second": 982.614, |
|
"eval_steps_per_second": 61.413, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 20.55, |
|
"learning_rate": 2.4326666666666666e-07, |
|
"loss": 3.743, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 20.55, |
|
"eval_loss": 3.5827953815460205, |
|
"eval_runtime": 41.0389, |
|
"eval_samples_per_second": 974.684, |
|
"eval_steps_per_second": 60.918, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 20.72, |
|
"eval_loss": 3.5676393508911133, |
|
"eval_runtime": 40.6587, |
|
"eval_samples_per_second": 983.798, |
|
"eval_steps_per_second": 61.487, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 20.88, |
|
"learning_rate": 2.405333333333333e-07, |
|
"loss": 3.75, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 20.88, |
|
"eval_loss": 3.5723633766174316, |
|
"eval_runtime": 40.664, |
|
"eval_samples_per_second": 983.671, |
|
"eval_steps_per_second": 61.479, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 21.05, |
|
"eval_loss": 3.5849897861480713, |
|
"eval_runtime": 40.9978, |
|
"eval_samples_per_second": 975.663, |
|
"eval_steps_per_second": 60.979, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 21.22, |
|
"learning_rate": 2.3779999999999997e-07, |
|
"loss": 3.7483, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 21.22, |
|
"eval_loss": 3.5873172283172607, |
|
"eval_runtime": 40.6772, |
|
"eval_samples_per_second": 983.351, |
|
"eval_steps_per_second": 61.459, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 21.39, |
|
"eval_loss": 3.579916000366211, |
|
"eval_runtime": 40.7474, |
|
"eval_samples_per_second": 981.657, |
|
"eval_steps_per_second": 61.354, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 21.56, |
|
"learning_rate": 2.3506666666666668e-07, |
|
"loss": 3.7523, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 21.56, |
|
"eval_loss": 3.5973777770996094, |
|
"eval_runtime": 40.6799, |
|
"eval_samples_per_second": 983.285, |
|
"eval_steps_per_second": 61.455, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 21.73, |
|
"eval_loss": 3.5790483951568604, |
|
"eval_runtime": 40.6889, |
|
"eval_samples_per_second": 983.069, |
|
"eval_steps_per_second": 61.442, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 21.89, |
|
"learning_rate": 2.3233333333333334e-07, |
|
"loss": 3.7458, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 21.89, |
|
"eval_loss": 3.5884082317352295, |
|
"eval_runtime": 40.7901, |
|
"eval_samples_per_second": 980.63, |
|
"eval_steps_per_second": 61.289, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 22.06, |
|
"eval_loss": 3.590412139892578, |
|
"eval_runtime": 40.7466, |
|
"eval_samples_per_second": 981.678, |
|
"eval_steps_per_second": 61.355, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 22.23, |
|
"learning_rate": 2.2960000000000002e-07, |
|
"loss": 3.7498, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 22.23, |
|
"eval_loss": 3.5850796699523926, |
|
"eval_runtime": 40.7799, |
|
"eval_samples_per_second": 980.876, |
|
"eval_steps_per_second": 61.305, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"eval_loss": 3.5775773525238037, |
|
"eval_runtime": 40.7308, |
|
"eval_samples_per_second": 982.058, |
|
"eval_steps_per_second": 61.379, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 22.57, |
|
"learning_rate": 2.2686666666666667e-07, |
|
"loss": 3.7496, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 22.57, |
|
"eval_loss": 3.5685038566589355, |
|
"eval_runtime": 40.7046, |
|
"eval_samples_per_second": 982.691, |
|
"eval_steps_per_second": 61.418, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 22.74, |
|
"eval_loss": 3.5731077194213867, |
|
"eval_runtime": 40.7653, |
|
"eval_samples_per_second": 981.226, |
|
"eval_steps_per_second": 61.327, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 22.9, |
|
"learning_rate": 2.2413333333333333e-07, |
|
"loss": 3.7395, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 22.9, |
|
"eval_loss": 3.5858407020568848, |
|
"eval_runtime": 40.7416, |
|
"eval_samples_per_second": 981.799, |
|
"eval_steps_per_second": 61.362, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 23.07, |
|
"eval_loss": 3.593097686767578, |
|
"eval_runtime": 40.8353, |
|
"eval_samples_per_second": 979.546, |
|
"eval_steps_per_second": 61.222, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 23.24, |
|
"learning_rate": 2.214e-07, |
|
"loss": 3.7466, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 23.24, |
|
"eval_loss": 3.561359405517578, |
|
"eval_runtime": 40.8333, |
|
"eval_samples_per_second": 979.593, |
|
"eval_steps_per_second": 61.225, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 23.41, |
|
"eval_loss": 3.54563045501709, |
|
"eval_runtime": 40.8237, |
|
"eval_samples_per_second": 979.822, |
|
"eval_steps_per_second": 61.239, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"learning_rate": 2.1866666666666667e-07, |
|
"loss": 3.7503, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"eval_loss": 3.5894973278045654, |
|
"eval_runtime": 40.774, |
|
"eval_samples_per_second": 981.017, |
|
"eval_steps_per_second": 61.314, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 23.75, |
|
"eval_loss": 3.560800075531006, |
|
"eval_runtime": 40.772, |
|
"eval_samples_per_second": 981.064, |
|
"eval_steps_per_second": 61.317, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 23.92, |
|
"learning_rate": 2.1593333333333332e-07, |
|
"loss": 3.7484, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 23.92, |
|
"eval_loss": 3.569559335708618, |
|
"eval_runtime": 40.8573, |
|
"eval_samples_per_second": 979.016, |
|
"eval_steps_per_second": 61.189, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 24.08, |
|
"eval_loss": 3.5653023719787598, |
|
"eval_runtime": 40.8073, |
|
"eval_samples_per_second": 980.216, |
|
"eval_steps_per_second": 61.264, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 24.25, |
|
"learning_rate": 2.132e-07, |
|
"loss": 3.7435, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 24.25, |
|
"eval_loss": 3.5721492767333984, |
|
"eval_runtime": 41.2214, |
|
"eval_samples_per_second": 970.369, |
|
"eval_steps_per_second": 60.648, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 24.42, |
|
"eval_loss": 3.5510270595550537, |
|
"eval_runtime": 40.8174, |
|
"eval_samples_per_second": 979.974, |
|
"eval_steps_per_second": 61.248, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 24.59, |
|
"learning_rate": 2.1046666666666666e-07, |
|
"loss": 3.7348, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 24.59, |
|
"eval_loss": 3.5631327629089355, |
|
"eval_runtime": 40.9539, |
|
"eval_samples_per_second": 976.708, |
|
"eval_steps_per_second": 61.044, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 24.76, |
|
"eval_loss": 3.5727241039276123, |
|
"eval_runtime": 40.9289, |
|
"eval_samples_per_second": 977.305, |
|
"eval_steps_per_second": 61.082, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 24.93, |
|
"learning_rate": 2.0773333333333334e-07, |
|
"loss": 3.7341, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 24.93, |
|
"eval_loss": 3.5835182666778564, |
|
"eval_runtime": 40.8074, |
|
"eval_samples_per_second": 980.214, |
|
"eval_steps_per_second": 61.263, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 25.09, |
|
"eval_loss": 3.5766148567199707, |
|
"eval_runtime": 40.8582, |
|
"eval_samples_per_second": 978.995, |
|
"eval_steps_per_second": 61.187, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 25.26, |
|
"learning_rate": 2.05e-07, |
|
"loss": 3.7435, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 25.26, |
|
"eval_loss": 3.560559034347534, |
|
"eval_runtime": 40.757, |
|
"eval_samples_per_second": 981.427, |
|
"eval_steps_per_second": 61.339, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 25.43, |
|
"eval_loss": 3.5497019290924072, |
|
"eval_runtime": 40.8866, |
|
"eval_samples_per_second": 978.316, |
|
"eval_steps_per_second": 61.145, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 2.0226666666666668e-07, |
|
"loss": 3.732, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"eval_loss": 3.543264150619507, |
|
"eval_runtime": 41.019, |
|
"eval_samples_per_second": 975.158, |
|
"eval_steps_per_second": 60.947, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 25.77, |
|
"eval_loss": 3.5419702529907227, |
|
"eval_runtime": 40.8947, |
|
"eval_samples_per_second": 978.121, |
|
"eval_steps_per_second": 61.133, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"learning_rate": 1.9953333333333333e-07, |
|
"loss": 3.7343, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"eval_loss": 3.598745346069336, |
|
"eval_runtime": 40.7705, |
|
"eval_samples_per_second": 981.101, |
|
"eval_steps_per_second": 61.319, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 26.1, |
|
"eval_loss": 3.5955729484558105, |
|
"eval_runtime": 40.7487, |
|
"eval_samples_per_second": 981.626, |
|
"eval_steps_per_second": 61.352, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 26.27, |
|
"learning_rate": 1.968e-07, |
|
"loss": 3.7336, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 26.27, |
|
"eval_loss": 3.56730580329895, |
|
"eval_runtime": 41.2363, |
|
"eval_samples_per_second": 970.019, |
|
"eval_steps_per_second": 60.626, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 26.44, |
|
"eval_loss": 3.564300775527954, |
|
"eval_runtime": 40.7963, |
|
"eval_samples_per_second": 980.481, |
|
"eval_steps_per_second": 61.28, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 26.61, |
|
"learning_rate": 1.9406666666666667e-07, |
|
"loss": 3.7444, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 26.61, |
|
"eval_loss": 3.5847718715667725, |
|
"eval_runtime": 40.905, |
|
"eval_samples_per_second": 977.875, |
|
"eval_steps_per_second": 61.117, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 26.78, |
|
"eval_loss": 3.5693116188049316, |
|
"eval_runtime": 40.875, |
|
"eval_samples_per_second": 978.593, |
|
"eval_steps_per_second": 61.162, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 26.95, |
|
"learning_rate": 1.9133333333333333e-07, |
|
"loss": 3.7395, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 26.95, |
|
"eval_loss": 3.574453830718994, |
|
"eval_runtime": 40.829, |
|
"eval_samples_per_second": 979.695, |
|
"eval_steps_per_second": 61.231, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"eval_loss": 3.5757510662078857, |
|
"eval_runtime": 41.0564, |
|
"eval_samples_per_second": 974.269, |
|
"eval_steps_per_second": 60.892, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 27.28, |
|
"learning_rate": 1.886e-07, |
|
"loss": 3.7389, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 27.28, |
|
"eval_loss": 3.568530559539795, |
|
"eval_runtime": 40.9717, |
|
"eval_samples_per_second": 976.284, |
|
"eval_steps_per_second": 61.018, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 27.45, |
|
"eval_loss": 3.5712063312530518, |
|
"eval_runtime": 41.0295, |
|
"eval_samples_per_second": 974.909, |
|
"eval_steps_per_second": 60.932, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 27.62, |
|
"learning_rate": 1.8586666666666666e-07, |
|
"loss": 3.7416, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 27.62, |
|
"eval_loss": 3.5692648887634277, |
|
"eval_runtime": 41.0883, |
|
"eval_samples_per_second": 973.513, |
|
"eval_steps_per_second": 60.845, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 27.79, |
|
"eval_loss": 3.574037790298462, |
|
"eval_runtime": 41.0301, |
|
"eval_samples_per_second": 974.894, |
|
"eval_steps_per_second": 60.931, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 27.96, |
|
"learning_rate": 1.8313333333333332e-07, |
|
"loss": 3.7305, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 27.96, |
|
"eval_loss": 3.580286741256714, |
|
"eval_runtime": 41.1151, |
|
"eval_samples_per_second": 972.878, |
|
"eval_steps_per_second": 60.805, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 28.13, |
|
"eval_loss": 3.5682218074798584, |
|
"eval_runtime": 41.0432, |
|
"eval_samples_per_second": 974.583, |
|
"eval_steps_per_second": 60.911, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 28.29, |
|
"learning_rate": 1.804e-07, |
|
"loss": 3.7268, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 28.29, |
|
"eval_loss": 3.592750310897827, |
|
"eval_runtime": 41.2516, |
|
"eval_samples_per_second": 969.659, |
|
"eval_steps_per_second": 60.604, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 28.46, |
|
"eval_loss": 3.560762405395508, |
|
"eval_runtime": 41.1073, |
|
"eval_samples_per_second": 973.062, |
|
"eval_steps_per_second": 60.816, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 28.63, |
|
"learning_rate": 1.7766666666666666e-07, |
|
"loss": 3.7363, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 28.63, |
|
"eval_loss": 3.5586514472961426, |
|
"eval_runtime": 41.0403, |
|
"eval_samples_per_second": 974.651, |
|
"eval_steps_per_second": 60.916, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"eval_loss": 3.5602593421936035, |
|
"eval_runtime": 41.0821, |
|
"eval_samples_per_second": 973.659, |
|
"eval_steps_per_second": 60.854, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 28.97, |
|
"learning_rate": 1.7493333333333334e-07, |
|
"loss": 3.7325, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 28.97, |
|
"eval_loss": 3.5710933208465576, |
|
"eval_runtime": 40.9996, |
|
"eval_samples_per_second": 975.619, |
|
"eval_steps_per_second": 60.976, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 29.14, |
|
"eval_loss": 3.5828020572662354, |
|
"eval_runtime": 41.0029, |
|
"eval_samples_per_second": 975.54, |
|
"eval_steps_per_second": 60.971, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 29.3, |
|
"learning_rate": 1.722e-07, |
|
"loss": 3.7337, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 29.3, |
|
"eval_loss": 3.5789852142333984, |
|
"eval_runtime": 40.9538, |
|
"eval_samples_per_second": 976.711, |
|
"eval_steps_per_second": 61.044, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 29.47, |
|
"eval_loss": 3.5794663429260254, |
|
"eval_runtime": 41.0721, |
|
"eval_samples_per_second": 973.898, |
|
"eval_steps_per_second": 60.869, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 29.64, |
|
"learning_rate": 1.6946666666666668e-07, |
|
"loss": 3.7367, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 29.64, |
|
"eval_loss": 3.552762031555176, |
|
"eval_runtime": 41.0733, |
|
"eval_samples_per_second": 973.869, |
|
"eval_steps_per_second": 60.867, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 29.81, |
|
"eval_loss": 3.5766358375549316, |
|
"eval_runtime": 41.1321, |
|
"eval_samples_per_second": 972.476, |
|
"eval_steps_per_second": 60.78, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 29.98, |
|
"learning_rate": 1.6673333333333333e-07, |
|
"loss": 3.7313, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 29.98, |
|
"eval_loss": 3.56103777885437, |
|
"eval_runtime": 41.1652, |
|
"eval_samples_per_second": 971.695, |
|
"eval_steps_per_second": 60.731, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 30.15, |
|
"eval_loss": 3.5833914279937744, |
|
"eval_runtime": 41.1146, |
|
"eval_samples_per_second": 972.891, |
|
"eval_steps_per_second": 60.806, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 30.32, |
|
"learning_rate": 1.64e-07, |
|
"loss": 3.7277, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 30.32, |
|
"eval_loss": 3.554572582244873, |
|
"eval_runtime": 41.1393, |
|
"eval_samples_per_second": 972.307, |
|
"eval_steps_per_second": 60.769, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 30.48, |
|
"eval_loss": 3.5534067153930664, |
|
"eval_runtime": 41.4231, |
|
"eval_samples_per_second": 965.644, |
|
"eval_steps_per_second": 60.353, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 30.65, |
|
"learning_rate": 1.6126666666666667e-07, |
|
"loss": 3.7296, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 30.65, |
|
"eval_loss": 3.564561605453491, |
|
"eval_runtime": 41.4067, |
|
"eval_samples_per_second": 966.026, |
|
"eval_steps_per_second": 60.377, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 30.82, |
|
"eval_loss": 3.543602705001831, |
|
"eval_runtime": 41.1326, |
|
"eval_samples_per_second": 972.466, |
|
"eval_steps_per_second": 60.779, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 30.99, |
|
"learning_rate": 1.5853333333333332e-07, |
|
"loss": 3.7411, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 30.99, |
|
"eval_loss": 3.5777840614318848, |
|
"eval_runtime": 41.3406, |
|
"eval_samples_per_second": 967.572, |
|
"eval_steps_per_second": 60.473, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 31.16, |
|
"eval_loss": 3.554126262664795, |
|
"eval_runtime": 41.1916, |
|
"eval_samples_per_second": 971.072, |
|
"eval_steps_per_second": 60.692, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 31.33, |
|
"learning_rate": 1.558e-07, |
|
"loss": 3.7233, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 31.33, |
|
"eval_loss": 3.5719780921936035, |
|
"eval_runtime": 41.1623, |
|
"eval_samples_per_second": 971.763, |
|
"eval_steps_per_second": 60.735, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 31.49, |
|
"eval_loss": 3.5567288398742676, |
|
"eval_runtime": 41.1991, |
|
"eval_samples_per_second": 970.896, |
|
"eval_steps_per_second": 60.681, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 31.66, |
|
"learning_rate": 1.5306666666666666e-07, |
|
"loss": 3.7291, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 31.66, |
|
"eval_loss": 3.547672748565674, |
|
"eval_runtime": 41.202, |
|
"eval_samples_per_second": 970.826, |
|
"eval_steps_per_second": 60.677, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 31.83, |
|
"eval_loss": 3.5557453632354736, |
|
"eval_runtime": 41.3088, |
|
"eval_samples_per_second": 968.316, |
|
"eval_steps_per_second": 60.52, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 1.5033333333333332e-07, |
|
"loss": 3.7265, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 3.564314842224121, |
|
"eval_runtime": 41.189, |
|
"eval_samples_per_second": 971.134, |
|
"eval_steps_per_second": 60.696, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 32.17, |
|
"eval_loss": 3.5738565921783447, |
|
"eval_runtime": 41.1759, |
|
"eval_samples_per_second": 971.443, |
|
"eval_steps_per_second": 60.715, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 32.34, |
|
"learning_rate": 1.476e-07, |
|
"loss": 3.7352, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 32.34, |
|
"eval_loss": 3.562810182571411, |
|
"eval_runtime": 41.1629, |
|
"eval_samples_per_second": 971.75, |
|
"eval_steps_per_second": 60.734, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"eval_loss": 3.5542376041412354, |
|
"eval_runtime": 41.3509, |
|
"eval_samples_per_second": 967.331, |
|
"eval_steps_per_second": 60.458, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 32.67, |
|
"learning_rate": 1.4486666666666665e-07, |
|
"loss": 3.7353, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 32.67, |
|
"eval_loss": 3.549649715423584, |
|
"eval_runtime": 41.29, |
|
"eval_samples_per_second": 968.757, |
|
"eval_steps_per_second": 60.547, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 32.84, |
|
"eval_loss": 3.5736968517303467, |
|
"eval_runtime": 41.3467, |
|
"eval_samples_per_second": 967.429, |
|
"eval_steps_per_second": 60.464, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 1.4213333333333334e-07, |
|
"loss": 3.7243, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"eval_loss": 3.578824520111084, |
|
"eval_runtime": 41.2986, |
|
"eval_samples_per_second": 968.556, |
|
"eval_steps_per_second": 60.535, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 33.18, |
|
"eval_loss": 3.5630581378936768, |
|
"eval_runtime": 41.302, |
|
"eval_samples_per_second": 968.476, |
|
"eval_steps_per_second": 60.53, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 33.35, |
|
"learning_rate": 1.3940000000000002e-07, |
|
"loss": 3.7192, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 33.35, |
|
"eval_loss": 3.543769598007202, |
|
"eval_runtime": 41.2526, |
|
"eval_samples_per_second": 969.637, |
|
"eval_steps_per_second": 60.602, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 33.52, |
|
"eval_loss": 3.5554468631744385, |
|
"eval_runtime": 41.4086, |
|
"eval_samples_per_second": 965.983, |
|
"eval_steps_per_second": 60.374, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 33.68, |
|
"learning_rate": 1.3666666666666665e-07, |
|
"loss": 3.7266, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 33.68, |
|
"eval_loss": 3.574753999710083, |
|
"eval_runtime": 41.1899, |
|
"eval_samples_per_second": 971.113, |
|
"eval_steps_per_second": 60.695, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 33.85, |
|
"eval_loss": 3.5620429515838623, |
|
"eval_runtime": 41.192, |
|
"eval_samples_per_second": 971.061, |
|
"eval_steps_per_second": 60.691, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 34.02, |
|
"learning_rate": 1.3393333333333333e-07, |
|
"loss": 3.73, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 34.02, |
|
"eval_loss": 3.5463855266571045, |
|
"eval_runtime": 41.1647, |
|
"eval_samples_per_second": 971.707, |
|
"eval_steps_per_second": 60.732, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 34.19, |
|
"eval_loss": 3.56703782081604, |
|
"eval_runtime": 41.345, |
|
"eval_samples_per_second": 967.469, |
|
"eval_steps_per_second": 60.467, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 34.36, |
|
"learning_rate": 1.312e-07, |
|
"loss": 3.7264, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 34.36, |
|
"eval_loss": 3.562556743621826, |
|
"eval_runtime": 41.1519, |
|
"eval_samples_per_second": 972.009, |
|
"eval_steps_per_second": 60.751, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 34.53, |
|
"eval_loss": 3.5640175342559814, |
|
"eval_runtime": 41.225, |
|
"eval_samples_per_second": 970.286, |
|
"eval_steps_per_second": 60.643, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 34.69, |
|
"learning_rate": 1.2846666666666667e-07, |
|
"loss": 3.7317, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 34.69, |
|
"eval_loss": 3.565020799636841, |
|
"eval_runtime": 41.2445, |
|
"eval_samples_per_second": 969.827, |
|
"eval_steps_per_second": 60.614, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 34.86, |
|
"eval_loss": 3.545823335647583, |
|
"eval_runtime": 41.181, |
|
"eval_samples_per_second": 971.323, |
|
"eval_steps_per_second": 60.708, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 35.03, |
|
"learning_rate": 1.2573333333333332e-07, |
|
"loss": 3.7332, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 35.03, |
|
"eval_loss": 3.556736707687378, |
|
"eval_runtime": 41.2053, |
|
"eval_samples_per_second": 970.749, |
|
"eval_steps_per_second": 60.672, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"eval_loss": 3.561016321182251, |
|
"eval_runtime": 41.1105, |
|
"eval_samples_per_second": 972.987, |
|
"eval_steps_per_second": 60.812, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 35.37, |
|
"learning_rate": 1.23e-07, |
|
"loss": 3.7248, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 35.37, |
|
"eval_loss": 3.565033197402954, |
|
"eval_runtime": 41.1314, |
|
"eval_samples_per_second": 972.494, |
|
"eval_steps_per_second": 60.781, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 35.54, |
|
"eval_loss": 3.5579514503479004, |
|
"eval_runtime": 41.2359, |
|
"eval_samples_per_second": 970.029, |
|
"eval_steps_per_second": 60.627, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 35.7, |
|
"learning_rate": 1.2026666666666666e-07, |
|
"loss": 3.7232, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 35.7, |
|
"eval_loss": 3.5829317569732666, |
|
"eval_runtime": 41.5316, |
|
"eval_samples_per_second": 963.123, |
|
"eval_steps_per_second": 60.195, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 35.87, |
|
"eval_loss": 3.553234338760376, |
|
"eval_runtime": 41.2091, |
|
"eval_samples_per_second": 970.659, |
|
"eval_steps_per_second": 60.666, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 36.04, |
|
"learning_rate": 1.1753333333333334e-07, |
|
"loss": 3.729, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 36.04, |
|
"eval_loss": 3.5723159313201904, |
|
"eval_runtime": 41.2473, |
|
"eval_samples_per_second": 969.76, |
|
"eval_steps_per_second": 60.61, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 36.21, |
|
"eval_loss": 3.5453789234161377, |
|
"eval_runtime": 41.2829, |
|
"eval_samples_per_second": 968.923, |
|
"eval_steps_per_second": 60.558, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 36.38, |
|
"learning_rate": 1.1480000000000001e-07, |
|
"loss": 3.7273, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 36.38, |
|
"eval_loss": 3.5622527599334717, |
|
"eval_runtime": 41.2679, |
|
"eval_samples_per_second": 969.276, |
|
"eval_steps_per_second": 60.58, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 36.55, |
|
"eval_loss": 3.5461573600769043, |
|
"eval_runtime": 41.2796, |
|
"eval_samples_per_second": 969.001, |
|
"eval_steps_per_second": 60.563, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 36.72, |
|
"learning_rate": 1.1206666666666666e-07, |
|
"loss": 3.7261, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 36.72, |
|
"eval_loss": 3.574284315109253, |
|
"eval_runtime": 41.1482, |
|
"eval_samples_per_second": 972.096, |
|
"eval_steps_per_second": 60.756, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 36.88, |
|
"eval_loss": 3.5637691020965576, |
|
"eval_runtime": 41.2008, |
|
"eval_samples_per_second": 970.856, |
|
"eval_steps_per_second": 60.678, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 37.05, |
|
"learning_rate": 1.0933333333333333e-07, |
|
"loss": 3.7208, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 37.05, |
|
"eval_loss": 3.55189847946167, |
|
"eval_runtime": 41.2562, |
|
"eval_samples_per_second": 969.552, |
|
"eval_steps_per_second": 60.597, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 37.22, |
|
"eval_loss": 3.558392286300659, |
|
"eval_runtime": 41.4546, |
|
"eval_samples_per_second": 964.911, |
|
"eval_steps_per_second": 60.307, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 37.39, |
|
"learning_rate": 1.066e-07, |
|
"loss": 3.7183, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 37.39, |
|
"eval_loss": 3.5308432579040527, |
|
"eval_runtime": 41.3019, |
|
"eval_samples_per_second": 968.479, |
|
"eval_steps_per_second": 60.53, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 37.56, |
|
"eval_loss": 3.554945230484009, |
|
"eval_runtime": 41.2828, |
|
"eval_samples_per_second": 968.925, |
|
"eval_steps_per_second": 60.558, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 37.73, |
|
"learning_rate": 1.0386666666666667e-07, |
|
"loss": 3.7193, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 37.73, |
|
"eval_loss": 3.540940999984741, |
|
"eval_runtime": 41.3223, |
|
"eval_samples_per_second": 968.0, |
|
"eval_steps_per_second": 60.5, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 37.89, |
|
"eval_loss": 3.5396194458007812, |
|
"eval_runtime": 41.4779, |
|
"eval_samples_per_second": 964.368, |
|
"eval_steps_per_second": 60.273, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 38.06, |
|
"learning_rate": 1.0113333333333334e-07, |
|
"loss": 3.7271, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 38.06, |
|
"eval_loss": 3.553603172302246, |
|
"eval_runtime": 41.2755, |
|
"eval_samples_per_second": 969.099, |
|
"eval_steps_per_second": 60.569, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 38.23, |
|
"eval_loss": 3.5452282428741455, |
|
"eval_runtime": 41.2774, |
|
"eval_samples_per_second": 969.053, |
|
"eval_steps_per_second": 60.566, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"learning_rate": 9.84e-08, |
|
"loss": 3.7284, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"eval_loss": 3.558215618133545, |
|
"eval_runtime": 41.3703, |
|
"eval_samples_per_second": 966.878, |
|
"eval_steps_per_second": 60.43, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 38.57, |
|
"eval_loss": 3.5667941570281982, |
|
"eval_runtime": 41.3196, |
|
"eval_samples_per_second": 968.064, |
|
"eval_steps_per_second": 60.504, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 38.74, |
|
"learning_rate": 9.566666666666666e-08, |
|
"loss": 3.714, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 38.74, |
|
"eval_loss": 3.567256212234497, |
|
"eval_runtime": 41.4289, |
|
"eval_samples_per_second": 965.51, |
|
"eval_steps_per_second": 60.344, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 38.9, |
|
"eval_loss": 3.5477054119110107, |
|
"eval_runtime": 41.2882, |
|
"eval_samples_per_second": 968.799, |
|
"eval_steps_per_second": 60.55, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 39.07, |
|
"learning_rate": 9.293333333333333e-08, |
|
"loss": 3.7105, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 39.07, |
|
"eval_loss": 3.5661816596984863, |
|
"eval_runtime": 41.3711, |
|
"eval_samples_per_second": 966.858, |
|
"eval_steps_per_second": 60.429, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 39.24, |
|
"eval_loss": 3.5498225688934326, |
|
"eval_runtime": 41.2668, |
|
"eval_samples_per_second": 969.303, |
|
"eval_steps_per_second": 60.581, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 39.41, |
|
"learning_rate": 9.02e-08, |
|
"loss": 3.7189, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 39.41, |
|
"eval_loss": 3.549349546432495, |
|
"eval_runtime": 41.3612, |
|
"eval_samples_per_second": 967.091, |
|
"eval_steps_per_second": 60.443, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 39.58, |
|
"eval_loss": 3.5676159858703613, |
|
"eval_runtime": 41.3658, |
|
"eval_samples_per_second": 966.982, |
|
"eval_steps_per_second": 60.436, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 39.75, |
|
"learning_rate": 8.746666666666667e-08, |
|
"loss": 3.7203, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 39.75, |
|
"eval_loss": 3.5640437602996826, |
|
"eval_runtime": 41.3191, |
|
"eval_samples_per_second": 968.076, |
|
"eval_steps_per_second": 60.505, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 39.91, |
|
"eval_loss": 3.574657440185547, |
|
"eval_runtime": 41.2758, |
|
"eval_samples_per_second": 969.09, |
|
"eval_steps_per_second": 60.568, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 40.08, |
|
"learning_rate": 8.473333333333334e-08, |
|
"loss": 3.7271, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 40.08, |
|
"eval_loss": 3.559176445007324, |
|
"eval_runtime": 41.2682, |
|
"eval_samples_per_second": 969.268, |
|
"eval_steps_per_second": 60.579, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 40.25, |
|
"eval_loss": 3.5515100955963135, |
|
"eval_runtime": 41.3407, |
|
"eval_samples_per_second": 967.57, |
|
"eval_steps_per_second": 60.473, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 40.42, |
|
"learning_rate": 8.2e-08, |
|
"loss": 3.7237, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 40.42, |
|
"eval_loss": 3.5703725814819336, |
|
"eval_runtime": 41.32, |
|
"eval_samples_per_second": 968.053, |
|
"eval_steps_per_second": 60.503, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 40.59, |
|
"eval_loss": 3.56421160697937, |
|
"eval_runtime": 41.2747, |
|
"eval_samples_per_second": 969.118, |
|
"eval_steps_per_second": 60.57, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 40.76, |
|
"learning_rate": 7.926666666666666e-08, |
|
"loss": 3.723, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 40.76, |
|
"eval_loss": 3.529963970184326, |
|
"eval_runtime": 41.312, |
|
"eval_samples_per_second": 968.241, |
|
"eval_steps_per_second": 60.515, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 40.93, |
|
"eval_loss": 3.548246383666992, |
|
"eval_runtime": 41.3925, |
|
"eval_samples_per_second": 966.359, |
|
"eval_steps_per_second": 60.397, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 41.09, |
|
"learning_rate": 7.653333333333333e-08, |
|
"loss": 3.7224, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 41.09, |
|
"eval_loss": 3.558602809906006, |
|
"eval_runtime": 41.3607, |
|
"eval_samples_per_second": 967.102, |
|
"eval_steps_per_second": 60.444, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 41.26, |
|
"eval_loss": 3.5462992191314697, |
|
"eval_runtime": 41.3586, |
|
"eval_samples_per_second": 967.15, |
|
"eval_steps_per_second": 60.447, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 41.43, |
|
"learning_rate": 7.38e-08, |
|
"loss": 3.715, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 41.43, |
|
"eval_loss": 3.5323476791381836, |
|
"eval_runtime": 41.2806, |
|
"eval_samples_per_second": 968.977, |
|
"eval_steps_per_second": 60.561, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 41.6, |
|
"eval_loss": 3.5426485538482666, |
|
"eval_runtime": 41.7376, |
|
"eval_samples_per_second": 958.367, |
|
"eval_steps_per_second": 59.898, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 41.77, |
|
"learning_rate": 7.106666666666667e-08, |
|
"loss": 3.7209, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 41.77, |
|
"eval_loss": 3.551342487335205, |
|
"eval_runtime": 41.7643, |
|
"eval_samples_per_second": 957.756, |
|
"eval_steps_per_second": 59.86, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 41.94, |
|
"eval_loss": 3.561406135559082, |
|
"eval_runtime": 41.4726, |
|
"eval_samples_per_second": 964.492, |
|
"eval_steps_per_second": 60.281, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 42.1, |
|
"learning_rate": 6.833333333333332e-08, |
|
"loss": 3.7183, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 42.1, |
|
"eval_loss": 3.567796468734741, |
|
"eval_runtime": 41.269, |
|
"eval_samples_per_second": 969.25, |
|
"eval_steps_per_second": 60.578, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 42.27, |
|
"eval_loss": 3.5304062366485596, |
|
"eval_runtime": 41.3875, |
|
"eval_samples_per_second": 966.474, |
|
"eval_steps_per_second": 60.405, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 42.44, |
|
"learning_rate": 6.56e-08, |
|
"loss": 3.7161, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 42.44, |
|
"eval_loss": 3.5631351470947266, |
|
"eval_runtime": 41.3512, |
|
"eval_samples_per_second": 967.324, |
|
"eval_steps_per_second": 60.458, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 42.61, |
|
"eval_loss": 3.5589487552642822, |
|
"eval_runtime": 41.3841, |
|
"eval_samples_per_second": 966.556, |
|
"eval_steps_per_second": 60.41, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 42.78, |
|
"learning_rate": 6.286666666666666e-08, |
|
"loss": 3.7215, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 42.78, |
|
"eval_loss": 3.5639231204986572, |
|
"eval_runtime": 41.4018, |
|
"eval_samples_per_second": 966.143, |
|
"eval_steps_per_second": 60.384, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 42.95, |
|
"eval_loss": 3.5375659465789795, |
|
"eval_runtime": 41.3, |
|
"eval_samples_per_second": 968.522, |
|
"eval_steps_per_second": 60.533, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 43.11, |
|
"learning_rate": 6.013333333333333e-08, |
|
"loss": 3.7205, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 43.11, |
|
"eval_loss": 3.5478363037109375, |
|
"eval_runtime": 41.2814, |
|
"eval_samples_per_second": 968.959, |
|
"eval_steps_per_second": 60.56, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 43.28, |
|
"eval_loss": 3.551063299179077, |
|
"eval_runtime": 41.3483, |
|
"eval_samples_per_second": 967.391, |
|
"eval_steps_per_second": 60.462, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 43.45, |
|
"learning_rate": 5.7400000000000004e-08, |
|
"loss": 3.7178, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 43.45, |
|
"eval_loss": 3.5284996032714844, |
|
"eval_runtime": 41.4216, |
|
"eval_samples_per_second": 965.679, |
|
"eval_steps_per_second": 60.355, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 43.62, |
|
"eval_loss": 3.5428383350372314, |
|
"eval_runtime": 41.4407, |
|
"eval_samples_per_second": 965.235, |
|
"eval_steps_per_second": 60.327, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 43.79, |
|
"learning_rate": 5.4666666666666666e-08, |
|
"loss": 3.7232, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 43.79, |
|
"eval_loss": 3.5347280502319336, |
|
"eval_runtime": 41.6164, |
|
"eval_samples_per_second": 961.159, |
|
"eval_steps_per_second": 60.072, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 43.96, |
|
"eval_loss": 3.5500776767730713, |
|
"eval_runtime": 41.2368, |
|
"eval_samples_per_second": 970.006, |
|
"eval_steps_per_second": 60.625, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 44.13, |
|
"learning_rate": 5.1933333333333335e-08, |
|
"loss": 3.7167, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 44.13, |
|
"eval_loss": 3.5421836376190186, |
|
"eval_runtime": 41.7632, |
|
"eval_samples_per_second": 957.781, |
|
"eval_steps_per_second": 59.861, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 44.29, |
|
"eval_loss": 3.548715591430664, |
|
"eval_runtime": 42.4633, |
|
"eval_samples_per_second": 941.99, |
|
"eval_steps_per_second": 58.874, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 44.46, |
|
"learning_rate": 4.92e-08, |
|
"loss": 3.7253, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 44.46, |
|
"eval_loss": 3.5540173053741455, |
|
"eval_runtime": 41.2807, |
|
"eval_samples_per_second": 968.975, |
|
"eval_steps_per_second": 60.561, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 44.63, |
|
"eval_loss": 3.543179988861084, |
|
"eval_runtime": 41.2704, |
|
"eval_samples_per_second": 969.217, |
|
"eval_steps_per_second": 60.576, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"learning_rate": 4.6466666666666666e-08, |
|
"loss": 3.7139, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"eval_loss": 3.550206184387207, |
|
"eval_runtime": 41.9899, |
|
"eval_samples_per_second": 952.61, |
|
"eval_steps_per_second": 59.538, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 44.97, |
|
"eval_loss": 3.5449559688568115, |
|
"eval_runtime": 41.4028, |
|
"eval_samples_per_second": 966.118, |
|
"eval_steps_per_second": 60.382, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 45.14, |
|
"learning_rate": 4.3733333333333335e-08, |
|
"loss": 3.7194, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 45.14, |
|
"eval_loss": 3.5563695430755615, |
|
"eval_runtime": 41.3519, |
|
"eval_samples_per_second": 967.307, |
|
"eval_steps_per_second": 60.457, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 45.3, |
|
"eval_loss": 3.544080972671509, |
|
"eval_runtime": 41.479, |
|
"eval_samples_per_second": 964.344, |
|
"eval_steps_per_second": 60.271, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 45.47, |
|
"learning_rate": 4.1e-08, |
|
"loss": 3.7167, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 45.47, |
|
"eval_loss": 3.5549235343933105, |
|
"eval_runtime": 41.7585, |
|
"eval_samples_per_second": 957.888, |
|
"eval_steps_per_second": 59.868, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 45.64, |
|
"eval_loss": 3.5428645610809326, |
|
"eval_runtime": 41.4538, |
|
"eval_samples_per_second": 964.93, |
|
"eval_steps_per_second": 60.308, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 45.81, |
|
"learning_rate": 3.8266666666666665e-08, |
|
"loss": 3.7202, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 45.81, |
|
"eval_loss": 3.5612573623657227, |
|
"eval_runtime": 41.585, |
|
"eval_samples_per_second": 961.885, |
|
"eval_steps_per_second": 60.118, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 45.98, |
|
"eval_loss": 3.546862840652466, |
|
"eval_runtime": 41.4173, |
|
"eval_samples_per_second": 965.779, |
|
"eval_steps_per_second": 60.361, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"learning_rate": 3.5533333333333334e-08, |
|
"loss": 3.7193, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"eval_loss": 3.5467140674591064, |
|
"eval_runtime": 41.5286, |
|
"eval_samples_per_second": 963.191, |
|
"eval_steps_per_second": 60.199, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 46.31, |
|
"eval_loss": 3.5492827892303467, |
|
"eval_runtime": 41.8931, |
|
"eval_samples_per_second": 954.812, |
|
"eval_steps_per_second": 59.676, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 46.48, |
|
"learning_rate": 3.28e-08, |
|
"loss": 3.717, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 46.48, |
|
"eval_loss": 3.5651960372924805, |
|
"eval_runtime": 41.4355, |
|
"eval_samples_per_second": 965.356, |
|
"eval_steps_per_second": 60.335, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 46.65, |
|
"eval_loss": 3.566892623901367, |
|
"eval_runtime": 41.4881, |
|
"eval_samples_per_second": 964.133, |
|
"eval_steps_per_second": 60.258, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 46.82, |
|
"learning_rate": 3.0066666666666665e-08, |
|
"loss": 3.7164, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 46.82, |
|
"eval_loss": 3.575472593307495, |
|
"eval_runtime": 41.4007, |
|
"eval_samples_per_second": 966.167, |
|
"eval_steps_per_second": 60.385, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 46.99, |
|
"eval_loss": 3.5580363273620605, |
|
"eval_runtime": 41.4412, |
|
"eval_samples_per_second": 965.223, |
|
"eval_steps_per_second": 60.326, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 47.16, |
|
"learning_rate": 2.7333333333333333e-08, |
|
"loss": 3.715, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 47.16, |
|
"eval_loss": 3.5402655601501465, |
|
"eval_runtime": 41.4287, |
|
"eval_samples_per_second": 965.513, |
|
"eval_steps_per_second": 60.345, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 47.33, |
|
"eval_loss": 3.5521085262298584, |
|
"eval_runtime": 41.929, |
|
"eval_samples_per_second": 953.993, |
|
"eval_steps_per_second": 59.625, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 47.49, |
|
"learning_rate": 2.46e-08, |
|
"loss": 3.7091, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 47.49, |
|
"eval_loss": 3.5604448318481445, |
|
"eval_runtime": 41.9128, |
|
"eval_samples_per_second": 954.362, |
|
"eval_steps_per_second": 59.648, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 47.66, |
|
"eval_loss": 3.5401012897491455, |
|
"eval_runtime": 41.5487, |
|
"eval_samples_per_second": 962.726, |
|
"eval_steps_per_second": 60.17, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 47.83, |
|
"learning_rate": 2.1866666666666667e-08, |
|
"loss": 3.7199, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 47.83, |
|
"eval_loss": 3.5407750606536865, |
|
"eval_runtime": 41.6011, |
|
"eval_samples_per_second": 961.512, |
|
"eval_steps_per_second": 60.094, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 3.5508713722229004, |
|
"eval_runtime": 41.6348, |
|
"eval_samples_per_second": 960.736, |
|
"eval_steps_per_second": 60.046, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 48.17, |
|
"learning_rate": 1.9133333333333333e-08, |
|
"loss": 3.7238, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 48.17, |
|
"eval_loss": 3.53483510017395, |
|
"eval_runtime": 41.5381, |
|
"eval_samples_per_second": 962.972, |
|
"eval_steps_per_second": 60.186, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 48.34, |
|
"eval_loss": 3.5529632568359375, |
|
"eval_runtime": 41.6886, |
|
"eval_samples_per_second": 959.496, |
|
"eval_steps_per_second": 59.968, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 48.5, |
|
"learning_rate": 1.64e-08, |
|
"loss": 3.7193, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 48.5, |
|
"eval_loss": 3.544734001159668, |
|
"eval_runtime": 41.7228, |
|
"eval_samples_per_second": 958.709, |
|
"eval_steps_per_second": 59.919, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 48.67, |
|
"eval_loss": 3.545278549194336, |
|
"eval_runtime": 41.4067, |
|
"eval_samples_per_second": 966.027, |
|
"eval_steps_per_second": 60.377, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 48.84, |
|
"learning_rate": 1.3666666666666667e-08, |
|
"loss": 3.7195, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 48.84, |
|
"eval_loss": 3.5487241744995117, |
|
"eval_runtime": 42.1788, |
|
"eval_samples_per_second": 948.345, |
|
"eval_steps_per_second": 59.272, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"eval_loss": 3.5356762409210205, |
|
"eval_runtime": 41.475, |
|
"eval_samples_per_second": 964.436, |
|
"eval_steps_per_second": 60.277, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 49.18, |
|
"learning_rate": 1.0933333333333334e-08, |
|
"loss": 3.7187, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 49.18, |
|
"eval_loss": 3.540393352508545, |
|
"eval_runtime": 41.5311, |
|
"eval_samples_per_second": 963.133, |
|
"eval_steps_per_second": 60.196, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 49.35, |
|
"eval_loss": 3.524733304977417, |
|
"eval_runtime": 41.455, |
|
"eval_samples_per_second": 964.901, |
|
"eval_steps_per_second": 60.306, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 49.51, |
|
"learning_rate": 8.2e-09, |
|
"loss": 3.7157, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 49.51, |
|
"eval_loss": 3.5556745529174805, |
|
"eval_runtime": 41.5677, |
|
"eval_samples_per_second": 962.286, |
|
"eval_steps_per_second": 60.143, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 49.68, |
|
"eval_loss": 3.553208112716675, |
|
"eval_runtime": 41.4048, |
|
"eval_samples_per_second": 966.072, |
|
"eval_steps_per_second": 60.38, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 49.85, |
|
"learning_rate": 5.466666666666667e-09, |
|
"loss": 3.7144, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 49.85, |
|
"eval_loss": 3.5453133583068848, |
|
"eval_runtime": 41.6628, |
|
"eval_samples_per_second": 960.089, |
|
"eval_steps_per_second": 60.006, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 50.02, |
|
"eval_loss": 3.5421085357666016, |
|
"eval_runtime": 41.4612, |
|
"eval_samples_per_second": 964.757, |
|
"eval_steps_per_second": 60.297, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 50.19, |
|
"learning_rate": 2.7333333333333334e-09, |
|
"loss": 3.715, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 50.19, |
|
"eval_loss": 3.518317461013794, |
|
"eval_runtime": 41.4914, |
|
"eval_samples_per_second": 964.054, |
|
"eval_steps_per_second": 60.253, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 50.36, |
|
"eval_loss": 3.5473098754882812, |
|
"eval_runtime": 41.4726, |
|
"eval_samples_per_second": 964.491, |
|
"eval_steps_per_second": 60.281, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 50.53, |
|
"learning_rate": 0.0, |
|
"loss": 3.7208, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 50.53, |
|
"eval_loss": 3.5385937690734863, |
|
"eval_runtime": 41.4411, |
|
"eval_samples_per_second": 965.224, |
|
"eval_steps_per_second": 60.327, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 50.53, |
|
"step": 2400000, |
|
"total_flos": 7.752989891649069e+17, |
|
"train_loss": 3.7635687060546874, |
|
"train_runtime": 151726.3184, |
|
"train_samples_per_second": 253.087, |
|
"train_steps_per_second": 15.818 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 51, |
|
"save_steps": 32000, |
|
"total_flos": 7.752989891649069e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|