{ "best_metric": 3.5347280502319336, "best_model_checkpoint": "./model_tweets_2020_Q2_90/checkpoint-2080000", "epoch": 50.52525209995579, "eval_steps": 8000, "global_step": 2400000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "eval_loss": 4.064033031463623, "eval_runtime": 40.7192, "eval_samples_per_second": 982.337, "eval_steps_per_second": 61.396, "step": 8000 }, { "epoch": 0.34, "learning_rate": 4.0726666666666665e-07, "loss": 4.2654, "step": 16000 }, { "epoch": 0.34, "eval_loss": 3.941408634185791, "eval_runtime": 40.7754, "eval_samples_per_second": 980.983, "eval_steps_per_second": 61.311, "step": 16000 }, { "epoch": 0.51, "eval_loss": 3.8956282138824463, "eval_runtime": 40.7547, "eval_samples_per_second": 981.482, "eval_steps_per_second": 61.343, "step": 24000 }, { "epoch": 0.67, "learning_rate": 4.0453333333333336e-07, "loss": 4.0459, "step": 32000 }, { "epoch": 0.67, "eval_loss": 3.8526694774627686, "eval_runtime": 40.8002, "eval_samples_per_second": 980.388, "eval_steps_per_second": 61.274, "step": 32000 }, { "epoch": 0.84, "eval_loss": 3.8232262134552, "eval_runtime": 40.9229, "eval_samples_per_second": 977.449, "eval_steps_per_second": 61.091, "step": 40000 }, { "epoch": 1.01, "learning_rate": 4.018e-07, "loss": 3.9781, "step": 48000 }, { "epoch": 1.01, "eval_loss": 3.7806332111358643, "eval_runtime": 40.7016, "eval_samples_per_second": 982.762, "eval_steps_per_second": 61.423, "step": 48000 }, { "epoch": 1.18, "eval_loss": 3.7860567569732666, "eval_runtime": 40.8284, "eval_samples_per_second": 979.71, "eval_steps_per_second": 61.232, "step": 56000 }, { "epoch": 1.35, "learning_rate": 3.9906666666666667e-07, "loss": 3.9323, "step": 64000 }, { "epoch": 1.35, "eval_loss": 3.79296875, "eval_runtime": 40.6905, "eval_samples_per_second": 983.03, "eval_steps_per_second": 61.439, "step": 64000 }, { "epoch": 1.52, "eval_loss": 3.781362533569336, "eval_runtime": 40.7808, "eval_samples_per_second": 980.855, "eval_steps_per_second": 61.303, "step": 72000 }, { "epoch": 1.68, "learning_rate": 3.963333333333333e-07, "loss": 3.9224, "step": 80000 }, { "epoch": 1.68, "eval_loss": 3.781531572341919, "eval_runtime": 40.7108, "eval_samples_per_second": 982.541, "eval_steps_per_second": 61.409, "step": 80000 }, { "epoch": 1.85, "eval_loss": 3.7402968406677246, "eval_runtime": 40.9233, "eval_samples_per_second": 977.439, "eval_steps_per_second": 61.09, "step": 88000 }, { "epoch": 2.02, "learning_rate": 3.936e-07, "loss": 3.8924, "step": 96000 }, { "epoch": 2.02, "eval_loss": 3.7468085289001465, "eval_runtime": 40.9264, "eval_samples_per_second": 977.363, "eval_steps_per_second": 61.085, "step": 96000 }, { "epoch": 2.19, "eval_loss": 3.740011215209961, "eval_runtime": 40.805, "eval_samples_per_second": 980.273, "eval_steps_per_second": 61.267, "step": 104000 }, { "epoch": 2.36, "learning_rate": 3.908666666666667e-07, "loss": 3.879, "step": 112000 }, { "epoch": 2.36, "eval_loss": 3.7283473014831543, "eval_runtime": 41.0237, "eval_samples_per_second": 975.046, "eval_steps_per_second": 60.94, "step": 112000 }, { "epoch": 2.53, "eval_loss": 3.738088369369507, "eval_runtime": 40.8332, "eval_samples_per_second": 979.596, "eval_steps_per_second": 61.225, "step": 120000 }, { "epoch": 2.69, "learning_rate": 3.8813333333333334e-07, "loss": 3.8806, "step": 128000 }, { "epoch": 2.69, "eval_loss": 3.7072582244873047, "eval_runtime": 40.8641, "eval_samples_per_second": 978.853, "eval_steps_per_second": 61.178, "step": 128000 }, { "epoch": 2.86, "eval_loss": 3.7082958221435547, "eval_runtime": 40.6946, "eval_samples_per_second": 982.931, "eval_steps_per_second": 61.433, "step": 136000 }, { "epoch": 3.03, "learning_rate": 3.854e-07, "loss": 3.8659, "step": 144000 }, { "epoch": 3.03, "eval_loss": 3.69919490814209, "eval_runtime": 40.8002, "eval_samples_per_second": 980.387, "eval_steps_per_second": 61.274, "step": 144000 }, { "epoch": 3.2, "eval_loss": 3.695563316345215, "eval_runtime": 40.8922, "eval_samples_per_second": 978.182, "eval_steps_per_second": 61.136, "step": 152000 }, { "epoch": 3.37, "learning_rate": 3.8266666666666665e-07, "loss": 3.8634, "step": 160000 }, { "epoch": 3.37, "eval_loss": 3.674525022506714, "eval_runtime": 40.9185, "eval_samples_per_second": 977.552, "eval_steps_per_second": 61.097, "step": 160000 }, { "epoch": 3.54, "eval_loss": 3.70168399810791, "eval_runtime": 40.7392, "eval_samples_per_second": 981.855, "eval_steps_per_second": 61.366, "step": 168000 }, { "epoch": 3.71, "learning_rate": 3.799333333333333e-07, "loss": 3.8632, "step": 176000 }, { "epoch": 3.71, "eval_loss": 3.6960248947143555, "eval_runtime": 41.3044, "eval_samples_per_second": 968.42, "eval_steps_per_second": 60.526, "step": 176000 }, { "epoch": 3.87, "eval_loss": 3.7202460765838623, "eval_runtime": 40.9217, "eval_samples_per_second": 977.476, "eval_steps_per_second": 61.092, "step": 184000 }, { "epoch": 4.04, "learning_rate": 3.772e-07, "loss": 3.8416, "step": 192000 }, { "epoch": 4.04, "eval_loss": 3.7108640670776367, "eval_runtime": 40.749, "eval_samples_per_second": 981.619, "eval_steps_per_second": 61.351, "step": 192000 }, { "epoch": 4.21, "eval_loss": 3.694209098815918, "eval_runtime": 40.7078, "eval_samples_per_second": 982.612, "eval_steps_per_second": 61.413, "step": 200000 }, { "epoch": 4.38, "learning_rate": 3.7446666666666667e-07, "loss": 3.8368, "step": 208000 }, { "epoch": 4.38, "eval_loss": 3.6943516731262207, "eval_runtime": 40.8183, "eval_samples_per_second": 979.952, "eval_steps_per_second": 61.247, "step": 208000 }, { "epoch": 4.55, "eval_loss": 3.6750781536102295, "eval_runtime": 40.7333, "eval_samples_per_second": 981.999, "eval_steps_per_second": 61.375, "step": 216000 }, { "epoch": 4.72, "learning_rate": 3.7173333333333333e-07, "loss": 3.8359, "step": 224000 }, { "epoch": 4.72, "eval_loss": 3.6815297603607178, "eval_runtime": 40.6706, "eval_samples_per_second": 983.512, "eval_steps_per_second": 61.47, "step": 224000 }, { "epoch": 4.88, "eval_loss": 3.6914784908294678, "eval_runtime": 40.9289, "eval_samples_per_second": 977.304, "eval_steps_per_second": 61.082, "step": 232000 }, { "epoch": 5.05, "learning_rate": 3.69e-07, "loss": 3.8411, "step": 240000 }, { "epoch": 5.05, "eval_loss": 3.67960786819458, "eval_runtime": 40.7871, "eval_samples_per_second": 980.701, "eval_steps_per_second": 61.294, "step": 240000 }, { "epoch": 5.22, "eval_loss": 3.684703826904297, "eval_runtime": 40.8269, "eval_samples_per_second": 979.745, "eval_steps_per_second": 61.234, "step": 248000 }, { "epoch": 5.39, "learning_rate": 3.6626666666666664e-07, "loss": 3.8359, "step": 256000 }, { "epoch": 5.39, "eval_loss": 3.6988320350646973, "eval_runtime": 40.7811, "eval_samples_per_second": 980.847, "eval_steps_per_second": 61.303, "step": 256000 }, { "epoch": 5.56, "eval_loss": 3.679949998855591, "eval_runtime": 40.9831, "eval_samples_per_second": 976.011, "eval_steps_per_second": 61.001, "step": 264000 }, { "epoch": 5.73, "learning_rate": 3.6353333333333335e-07, "loss": 3.8268, "step": 272000 }, { "epoch": 5.73, "eval_loss": 3.681049108505249, "eval_runtime": 40.7829, "eval_samples_per_second": 980.804, "eval_steps_per_second": 61.3, "step": 272000 }, { "epoch": 5.89, "eval_loss": 3.66385817527771, "eval_runtime": 40.8484, "eval_samples_per_second": 979.23, "eval_steps_per_second": 61.202, "step": 280000 }, { "epoch": 6.06, "learning_rate": 3.608e-07, "loss": 3.8172, "step": 288000 }, { "epoch": 6.06, "eval_loss": 3.666334390640259, "eval_runtime": 41.3519, "eval_samples_per_second": 967.307, "eval_steps_per_second": 60.457, "step": 288000 }, { "epoch": 6.23, "eval_loss": 3.6838061809539795, "eval_runtime": 40.9071, "eval_samples_per_second": 977.825, "eval_steps_per_second": 61.114, "step": 296000 }, { "epoch": 6.4, "learning_rate": 3.5806666666666666e-07, "loss": 3.8263, "step": 304000 }, { "epoch": 6.4, "eval_loss": 3.6755523681640625, "eval_runtime": 41.0373, "eval_samples_per_second": 974.723, "eval_steps_per_second": 60.92, "step": 304000 }, { "epoch": 6.57, "eval_loss": 3.650726556777954, "eval_runtime": 40.8546, "eval_samples_per_second": 979.082, "eval_steps_per_second": 61.193, "step": 312000 }, { "epoch": 6.74, "learning_rate": 3.553333333333333e-07, "loss": 3.8215, "step": 320000 }, { "epoch": 6.74, "eval_loss": 3.640876054763794, "eval_runtime": 40.7809, "eval_samples_per_second": 980.851, "eval_steps_per_second": 61.303, "step": 320000 }, { "epoch": 6.91, "eval_loss": 3.678966760635376, "eval_runtime": 41.0811, "eval_samples_per_second": 973.684, "eval_steps_per_second": 60.855, "step": 328000 }, { "epoch": 7.07, "learning_rate": 3.5259999999999997e-07, "loss": 3.8189, "step": 336000 }, { "epoch": 7.07, "eval_loss": 3.6679444313049316, "eval_runtime": 40.9298, "eval_samples_per_second": 977.283, "eval_steps_per_second": 61.08, "step": 336000 }, { "epoch": 7.24, "eval_loss": 3.6443188190460205, "eval_runtime": 41.2575, "eval_samples_per_second": 969.52, "eval_steps_per_second": 60.595, "step": 344000 }, { "epoch": 7.41, "learning_rate": 3.498666666666667e-07, "loss": 3.8155, "step": 352000 }, { "epoch": 7.41, "eval_loss": 3.658766746520996, "eval_runtime": 40.7405, "eval_samples_per_second": 981.824, "eval_steps_per_second": 61.364, "step": 352000 }, { "epoch": 7.58, "eval_loss": 3.644803524017334, "eval_runtime": 40.9906, "eval_samples_per_second": 975.834, "eval_steps_per_second": 60.99, "step": 360000 }, { "epoch": 7.75, "learning_rate": 3.4713333333333333e-07, "loss": 3.8075, "step": 368000 }, { "epoch": 7.75, "eval_loss": 3.651991367340088, "eval_runtime": 40.88, "eval_samples_per_second": 978.474, "eval_steps_per_second": 61.155, "step": 368000 }, { "epoch": 7.92, "eval_loss": 3.654082775115967, "eval_runtime": 40.8852, "eval_samples_per_second": 978.348, "eval_steps_per_second": 61.147, "step": 376000 }, { "epoch": 8.08, "learning_rate": 3.444e-07, "loss": 3.8064, "step": 384000 }, { "epoch": 8.08, "eval_loss": 3.656877279281616, "eval_runtime": 40.8714, "eval_samples_per_second": 978.679, "eval_steps_per_second": 61.167, "step": 384000 }, { "epoch": 8.25, "eval_loss": 3.6586239337921143, "eval_runtime": 40.7353, "eval_samples_per_second": 981.949, "eval_steps_per_second": 61.372, "step": 392000 }, { "epoch": 8.42, "learning_rate": 3.416666666666667e-07, "loss": 3.8092, "step": 400000 }, { "epoch": 8.42, "eval_loss": 3.670098066329956, "eval_runtime": 41.244, "eval_samples_per_second": 969.839, "eval_steps_per_second": 60.615, "step": 400000 }, { "epoch": 8.59, "eval_loss": 3.6543703079223633, "eval_runtime": 40.8099, "eval_samples_per_second": 980.155, "eval_steps_per_second": 61.26, "step": 408000 }, { "epoch": 8.76, "learning_rate": 3.3893333333333335e-07, "loss": 3.8032, "step": 416000 }, { "epoch": 8.76, "eval_loss": 3.6668190956115723, "eval_runtime": 40.824, "eval_samples_per_second": 979.816, "eval_steps_per_second": 61.238, "step": 416000 }, { "epoch": 8.93, "eval_loss": 3.663094997406006, "eval_runtime": 40.8379, "eval_samples_per_second": 979.482, "eval_steps_per_second": 61.218, "step": 424000 }, { "epoch": 9.09, "learning_rate": 3.3619999999999995e-07, "loss": 3.8062, "step": 432000 }, { "epoch": 9.09, "eval_loss": 3.6481242179870605, "eval_runtime": 40.8688, "eval_samples_per_second": 978.741, "eval_steps_per_second": 61.171, "step": 432000 }, { "epoch": 9.26, "eval_loss": 3.6392152309417725, "eval_runtime": 40.7769, "eval_samples_per_second": 980.949, "eval_steps_per_second": 61.309, "step": 440000 }, { "epoch": 9.43, "learning_rate": 3.3346666666666666e-07, "loss": 3.7987, "step": 448000 }, { "epoch": 9.43, "eval_loss": 3.6482295989990234, "eval_runtime": 40.7877, "eval_samples_per_second": 980.689, "eval_steps_per_second": 61.293, "step": 448000 }, { "epoch": 9.6, "eval_loss": 3.635684013366699, "eval_runtime": 40.7447, "eval_samples_per_second": 981.723, "eval_steps_per_second": 61.358, "step": 456000 }, { "epoch": 9.77, "learning_rate": 3.307333333333333e-07, "loss": 3.7954, "step": 464000 }, { "epoch": 9.77, "eval_loss": 3.633270263671875, "eval_runtime": 40.9048, "eval_samples_per_second": 977.881, "eval_steps_per_second": 61.118, "step": 464000 }, { "epoch": 9.94, "eval_loss": 3.6653008460998535, "eval_runtime": 40.6738, "eval_samples_per_second": 983.433, "eval_steps_per_second": 61.465, "step": 472000 }, { "epoch": 10.11, "learning_rate": 3.28e-07, "loss": 3.7938, "step": 480000 }, { "epoch": 10.11, "eval_loss": 3.6266889572143555, "eval_runtime": 40.7255, "eval_samples_per_second": 982.185, "eval_steps_per_second": 61.387, "step": 480000 }, { "epoch": 10.27, "eval_loss": 3.6489765644073486, "eval_runtime": 40.8291, "eval_samples_per_second": 979.693, "eval_steps_per_second": 61.231, "step": 488000 }, { "epoch": 10.44, "learning_rate": 3.252666666666667e-07, "loss": 3.7901, "step": 496000 }, { "epoch": 10.44, "eval_loss": 3.641712188720703, "eval_runtime": 41.0566, "eval_samples_per_second": 974.264, "eval_steps_per_second": 60.892, "step": 496000 }, { "epoch": 10.61, "eval_loss": 3.6263108253479004, "eval_runtime": 40.7345, "eval_samples_per_second": 981.968, "eval_steps_per_second": 61.373, "step": 504000 }, { "epoch": 10.78, "learning_rate": 3.2253333333333334e-07, "loss": 3.7935, "step": 512000 }, { "epoch": 10.78, "eval_loss": 3.6522746086120605, "eval_runtime": 40.7085, "eval_samples_per_second": 982.595, "eval_steps_per_second": 61.412, "step": 512000 }, { "epoch": 10.95, "eval_loss": 3.6444039344787598, "eval_runtime": 40.8183, "eval_samples_per_second": 979.953, "eval_steps_per_second": 61.247, "step": 520000 }, { "epoch": 11.12, "learning_rate": 3.198e-07, "loss": 3.7951, "step": 528000 }, { "epoch": 11.12, "eval_loss": 3.622567892074585, "eval_runtime": 40.7606, "eval_samples_per_second": 981.339, "eval_steps_per_second": 61.334, "step": 528000 }, { "epoch": 11.28, "eval_loss": 3.6346988677978516, "eval_runtime": 40.6211, "eval_samples_per_second": 984.71, "eval_steps_per_second": 61.544, "step": 536000 }, { "epoch": 11.45, "learning_rate": 3.1706666666666665e-07, "loss": 3.7861, "step": 544000 }, { "epoch": 11.45, "eval_loss": 3.637178421020508, "eval_runtime": 40.7751, "eval_samples_per_second": 980.991, "eval_steps_per_second": 61.312, "step": 544000 }, { "epoch": 11.62, "eval_loss": 3.6162924766540527, "eval_runtime": 40.7193, "eval_samples_per_second": 982.336, "eval_steps_per_second": 61.396, "step": 552000 }, { "epoch": 11.79, "learning_rate": 3.1433333333333336e-07, "loss": 3.7846, "step": 560000 }, { "epoch": 11.79, "eval_loss": 3.6299352645874023, "eval_runtime": 40.8861, "eval_samples_per_second": 978.327, "eval_steps_per_second": 61.145, "step": 560000 }, { "epoch": 11.96, "eval_loss": 3.6330039501190186, "eval_runtime": 40.827, "eval_samples_per_second": 979.744, "eval_steps_per_second": 61.234, "step": 568000 }, { "epoch": 12.13, "learning_rate": 3.116e-07, "loss": 3.7778, "step": 576000 }, { "epoch": 12.13, "eval_loss": 3.637084484100342, "eval_runtime": 40.7594, "eval_samples_per_second": 981.368, "eval_steps_per_second": 61.335, "step": 576000 }, { "epoch": 12.29, "eval_loss": 3.634265661239624, "eval_runtime": 40.9272, "eval_samples_per_second": 977.346, "eval_steps_per_second": 61.084, "step": 584000 }, { "epoch": 12.46, "learning_rate": 3.0886666666666667e-07, "loss": 3.777, "step": 592000 }, { "epoch": 12.46, "eval_loss": 3.62420392036438, "eval_runtime": 40.8724, "eval_samples_per_second": 978.655, "eval_steps_per_second": 61.166, "step": 592000 }, { "epoch": 12.63, "eval_loss": 3.6119399070739746, "eval_runtime": 40.9368, "eval_samples_per_second": 977.116, "eval_steps_per_second": 61.07, "step": 600000 }, { "epoch": 12.8, "learning_rate": 3.061333333333333e-07, "loss": 3.778, "step": 608000 }, { "epoch": 12.8, "eval_loss": 3.6167094707489014, "eval_runtime": 40.959, "eval_samples_per_second": 976.587, "eval_steps_per_second": 61.037, "step": 608000 }, { "epoch": 12.97, "eval_loss": 3.6191112995147705, "eval_runtime": 41.1386, "eval_samples_per_second": 972.322, "eval_steps_per_second": 60.77, "step": 616000 }, { "epoch": 13.14, "learning_rate": 3.034e-07, "loss": 3.7795, "step": 624000 }, { "epoch": 13.14, "eval_loss": 3.622523546218872, "eval_runtime": 40.9825, "eval_samples_per_second": 976.027, "eval_steps_per_second": 61.002, "step": 624000 }, { "epoch": 13.3, "eval_loss": 3.605618476867676, "eval_runtime": 41.1305, "eval_samples_per_second": 972.515, "eval_steps_per_second": 60.782, "step": 632000 }, { "epoch": 13.47, "learning_rate": 3.0066666666666663e-07, "loss": 3.7766, "step": 640000 }, { "epoch": 13.47, "eval_loss": 3.613523483276367, "eval_runtime": 41.0605, "eval_samples_per_second": 974.172, "eval_steps_per_second": 60.886, "step": 640000 }, { "epoch": 13.64, "eval_loss": 3.6168975830078125, "eval_runtime": 41.0389, "eval_samples_per_second": 974.684, "eval_steps_per_second": 60.918, "step": 648000 }, { "epoch": 13.81, "learning_rate": 2.9793333333333334e-07, "loss": 3.7729, "step": 656000 }, { "epoch": 13.81, "eval_loss": 3.6035475730895996, "eval_runtime": 40.9659, "eval_samples_per_second": 976.423, "eval_steps_per_second": 61.026, "step": 656000 }, { "epoch": 13.98, "eval_loss": 3.6109204292297363, "eval_runtime": 40.9828, "eval_samples_per_second": 976.02, "eval_steps_per_second": 61.001, "step": 664000 }, { "epoch": 14.15, "learning_rate": 2.952e-07, "loss": 3.7846, "step": 672000 }, { "epoch": 14.15, "eval_loss": 3.617966413497925, "eval_runtime": 40.9537, "eval_samples_per_second": 976.714, "eval_steps_per_second": 61.045, "step": 672000 }, { "epoch": 14.32, "eval_loss": 3.6171200275421143, "eval_runtime": 40.9901, "eval_samples_per_second": 975.845, "eval_steps_per_second": 60.99, "step": 680000 }, { "epoch": 14.48, "learning_rate": 2.9246666666666665e-07, "loss": 3.7726, "step": 688000 }, { "epoch": 14.48, "eval_loss": 3.6182472705841064, "eval_runtime": 41.0924, "eval_samples_per_second": 973.416, "eval_steps_per_second": 60.838, "step": 688000 }, { "epoch": 14.65, "eval_loss": 3.608560085296631, "eval_runtime": 40.9759, "eval_samples_per_second": 976.185, "eval_steps_per_second": 61.012, "step": 696000 }, { "epoch": 14.82, "learning_rate": 2.897333333333333e-07, "loss": 3.7717, "step": 704000 }, { "epoch": 14.82, "eval_loss": 3.585219621658325, "eval_runtime": 41.0174, "eval_samples_per_second": 975.197, "eval_steps_per_second": 60.95, "step": 704000 }, { "epoch": 14.99, "eval_loss": 3.5882859230041504, "eval_runtime": 40.9445, "eval_samples_per_second": 976.932, "eval_steps_per_second": 61.058, "step": 712000 }, { "epoch": 15.16, "learning_rate": 2.8699999999999996e-07, "loss": 3.7713, "step": 720000 }, { "epoch": 15.16, "eval_loss": 3.605559825897217, "eval_runtime": 40.8496, "eval_samples_per_second": 979.203, "eval_steps_per_second": 61.2, "step": 720000 }, { "epoch": 15.33, "eval_loss": 3.6003661155700684, "eval_runtime": 40.9885, "eval_samples_per_second": 975.883, "eval_steps_per_second": 60.993, "step": 728000 }, { "epoch": 15.49, "learning_rate": 2.8426666666666667e-07, "loss": 3.7745, "step": 736000 }, { "epoch": 15.49, "eval_loss": 3.605851888656616, "eval_runtime": 40.9895, "eval_samples_per_second": 975.86, "eval_steps_per_second": 60.991, "step": 736000 }, { "epoch": 15.66, "eval_loss": 3.615556240081787, "eval_runtime": 41.0234, "eval_samples_per_second": 975.053, "eval_steps_per_second": 60.941, "step": 744000 }, { "epoch": 15.83, "learning_rate": 2.815333333333333e-07, "loss": 3.7557, "step": 752000 }, { "epoch": 15.83, "eval_loss": 3.6028969287872314, "eval_runtime": 41.0782, "eval_samples_per_second": 973.753, "eval_steps_per_second": 60.86, "step": 752000 }, { "epoch": 16.0, "eval_loss": 3.609947681427002, "eval_runtime": 41.5445, "eval_samples_per_second": 962.823, "eval_steps_per_second": 60.176, "step": 760000 }, { "epoch": 16.17, "learning_rate": 2.7880000000000003e-07, "loss": 3.7628, "step": 768000 }, { "epoch": 16.17, "eval_loss": 3.601588249206543, "eval_runtime": 41.0762, "eval_samples_per_second": 973.8, "eval_steps_per_second": 60.863, "step": 768000 }, { "epoch": 16.34, "eval_loss": 3.6008121967315674, "eval_runtime": 41.0463, "eval_samples_per_second": 974.508, "eval_steps_per_second": 60.907, "step": 776000 }, { "epoch": 16.5, "learning_rate": 2.7606666666666664e-07, "loss": 3.7717, "step": 784000 }, { "epoch": 16.5, "eval_loss": 3.597151517868042, "eval_runtime": 40.9425, "eval_samples_per_second": 976.979, "eval_steps_per_second": 61.061, "step": 784000 }, { "epoch": 16.67, "eval_loss": 3.5838093757629395, "eval_runtime": 40.8764, "eval_samples_per_second": 978.56, "eval_steps_per_second": 61.16, "step": 792000 }, { "epoch": 16.84, "learning_rate": 2.733333333333333e-07, "loss": 3.7616, "step": 800000 }, { "epoch": 16.84, "eval_loss": 3.5868074893951416, "eval_runtime": 40.7672, "eval_samples_per_second": 981.181, "eval_steps_per_second": 61.324, "step": 800000 }, { "epoch": 17.01, "eval_loss": 3.5833964347839355, "eval_runtime": 40.8806, "eval_samples_per_second": 978.459, "eval_steps_per_second": 61.154, "step": 808000 }, { "epoch": 17.18, "learning_rate": 2.706e-07, "loss": 3.7608, "step": 816000 }, { "epoch": 17.18, "eval_loss": 3.6065828800201416, "eval_runtime": 40.6947, "eval_samples_per_second": 982.928, "eval_steps_per_second": 61.433, "step": 816000 }, { "epoch": 17.35, "eval_loss": 3.591146469116211, "eval_runtime": 40.6695, "eval_samples_per_second": 983.539, "eval_steps_per_second": 61.471, "step": 824000 }, { "epoch": 17.52, "learning_rate": 2.6786666666666666e-07, "loss": 3.7625, "step": 832000 }, { "epoch": 17.52, "eval_loss": 3.5996882915496826, "eval_runtime": 40.5695, "eval_samples_per_second": 985.963, "eval_steps_per_second": 61.623, "step": 832000 }, { "epoch": 17.68, "eval_loss": 3.585501194000244, "eval_runtime": 40.7388, "eval_samples_per_second": 981.864, "eval_steps_per_second": 61.366, "step": 840000 }, { "epoch": 17.85, "learning_rate": 2.651333333333333e-07, "loss": 3.7634, "step": 848000 }, { "epoch": 17.85, "eval_loss": 3.5861265659332275, "eval_runtime": 40.647, "eval_samples_per_second": 984.082, "eval_steps_per_second": 61.505, "step": 848000 }, { "epoch": 18.02, "eval_loss": 3.6021432876586914, "eval_runtime": 40.6989, "eval_samples_per_second": 982.827, "eval_steps_per_second": 61.427, "step": 856000 }, { "epoch": 18.19, "learning_rate": 2.624e-07, "loss": 3.75, "step": 864000 }, { "epoch": 18.19, "eval_loss": 3.59663987159729, "eval_runtime": 40.7681, "eval_samples_per_second": 981.16, "eval_steps_per_second": 61.322, "step": 864000 }, { "epoch": 18.36, "eval_loss": 3.5761475563049316, "eval_runtime": 40.7529, "eval_samples_per_second": 981.526, "eval_steps_per_second": 61.345, "step": 872000 }, { "epoch": 18.53, "learning_rate": 2.596666666666667e-07, "loss": 3.7492, "step": 880000 }, { "epoch": 18.53, "eval_loss": 3.5757410526275635, "eval_runtime": 40.651, "eval_samples_per_second": 983.986, "eval_steps_per_second": 61.499, "step": 880000 }, { "epoch": 18.69, "eval_loss": 3.6122772693634033, "eval_runtime": 40.9703, "eval_samples_per_second": 976.316, "eval_steps_per_second": 61.02, "step": 888000 }, { "epoch": 18.86, "learning_rate": 2.5693333333333333e-07, "loss": 3.7522, "step": 896000 }, { "epoch": 18.86, "eval_loss": 3.584080934524536, "eval_runtime": 40.6825, "eval_samples_per_second": 983.223, "eval_steps_per_second": 61.451, "step": 896000 }, { "epoch": 19.03, "eval_loss": 3.5830750465393066, "eval_runtime": 40.7667, "eval_samples_per_second": 981.193, "eval_steps_per_second": 61.325, "step": 904000 }, { "epoch": 19.2, "learning_rate": 2.542e-07, "loss": 3.7482, "step": 912000 }, { "epoch": 19.2, "eval_loss": 3.5859768390655518, "eval_runtime": 40.7841, "eval_samples_per_second": 980.776, "eval_steps_per_second": 61.298, "step": 912000 }, { "epoch": 19.37, "eval_loss": 3.580415964126587, "eval_runtime": 40.6458, "eval_samples_per_second": 984.112, "eval_steps_per_second": 61.507, "step": 920000 }, { "epoch": 19.54, "learning_rate": 2.5146666666666664e-07, "loss": 3.75, "step": 928000 }, { "epoch": 19.54, "eval_loss": 3.573030471801758, "eval_runtime": 40.6766, "eval_samples_per_second": 983.365, "eval_steps_per_second": 61.46, "step": 928000 }, { "epoch": 19.7, "eval_loss": 3.595536708831787, "eval_runtime": 40.7802, "eval_samples_per_second": 980.869, "eval_steps_per_second": 61.304, "step": 936000 }, { "epoch": 19.87, "learning_rate": 2.4873333333333335e-07, "loss": 3.755, "step": 944000 }, { "epoch": 19.87, "eval_loss": 3.58677077293396, "eval_runtime": 40.6611, "eval_samples_per_second": 983.74, "eval_steps_per_second": 61.484, "step": 944000 }, { "epoch": 20.04, "eval_loss": 3.5992047786712646, "eval_runtime": 40.7082, "eval_samples_per_second": 982.602, "eval_steps_per_second": 61.413, "step": 952000 }, { "epoch": 20.21, "learning_rate": 2.46e-07, "loss": 3.7549, "step": 960000 }, { "epoch": 20.21, "eval_loss": 3.565748929977417, "eval_runtime": 40.7168, "eval_samples_per_second": 982.395, "eval_steps_per_second": 61.4, "step": 960000 }, { "epoch": 20.38, "eval_loss": 3.5780434608459473, "eval_runtime": 40.7077, "eval_samples_per_second": 982.614, "eval_steps_per_second": 61.413, "step": 968000 }, { "epoch": 20.55, "learning_rate": 2.4326666666666666e-07, "loss": 3.743, "step": 976000 }, { "epoch": 20.55, "eval_loss": 3.5827953815460205, "eval_runtime": 41.0389, "eval_samples_per_second": 974.684, "eval_steps_per_second": 60.918, "step": 976000 }, { "epoch": 20.72, "eval_loss": 3.5676393508911133, "eval_runtime": 40.6587, "eval_samples_per_second": 983.798, "eval_steps_per_second": 61.487, "step": 984000 }, { "epoch": 20.88, "learning_rate": 2.405333333333333e-07, "loss": 3.75, "step": 992000 }, { "epoch": 20.88, "eval_loss": 3.5723633766174316, "eval_runtime": 40.664, "eval_samples_per_second": 983.671, "eval_steps_per_second": 61.479, "step": 992000 }, { "epoch": 21.05, "eval_loss": 3.5849897861480713, "eval_runtime": 40.9978, "eval_samples_per_second": 975.663, "eval_steps_per_second": 60.979, "step": 1000000 }, { "epoch": 21.22, "learning_rate": 2.3779999999999997e-07, "loss": 3.7483, "step": 1008000 }, { "epoch": 21.22, "eval_loss": 3.5873172283172607, "eval_runtime": 40.6772, "eval_samples_per_second": 983.351, "eval_steps_per_second": 61.459, "step": 1008000 }, { "epoch": 21.39, "eval_loss": 3.579916000366211, "eval_runtime": 40.7474, "eval_samples_per_second": 981.657, "eval_steps_per_second": 61.354, "step": 1016000 }, { "epoch": 21.56, "learning_rate": 2.3506666666666668e-07, "loss": 3.7523, "step": 1024000 }, { "epoch": 21.56, "eval_loss": 3.5973777770996094, "eval_runtime": 40.6799, "eval_samples_per_second": 983.285, "eval_steps_per_second": 61.455, "step": 1024000 }, { "epoch": 21.73, "eval_loss": 3.5790483951568604, "eval_runtime": 40.6889, "eval_samples_per_second": 983.069, "eval_steps_per_second": 61.442, "step": 1032000 }, { "epoch": 21.89, "learning_rate": 2.3233333333333334e-07, "loss": 3.7458, "step": 1040000 }, { "epoch": 21.89, "eval_loss": 3.5884082317352295, "eval_runtime": 40.7901, "eval_samples_per_second": 980.63, "eval_steps_per_second": 61.289, "step": 1040000 }, { "epoch": 22.06, "eval_loss": 3.590412139892578, "eval_runtime": 40.7466, "eval_samples_per_second": 981.678, "eval_steps_per_second": 61.355, "step": 1048000 }, { "epoch": 22.23, "learning_rate": 2.2960000000000002e-07, "loss": 3.7498, "step": 1056000 }, { "epoch": 22.23, "eval_loss": 3.5850796699523926, "eval_runtime": 40.7799, "eval_samples_per_second": 980.876, "eval_steps_per_second": 61.305, "step": 1056000 }, { "epoch": 22.4, "eval_loss": 3.5775773525238037, "eval_runtime": 40.7308, "eval_samples_per_second": 982.058, "eval_steps_per_second": 61.379, "step": 1064000 }, { "epoch": 22.57, "learning_rate": 2.2686666666666667e-07, "loss": 3.7496, "step": 1072000 }, { "epoch": 22.57, "eval_loss": 3.5685038566589355, "eval_runtime": 40.7046, "eval_samples_per_second": 982.691, "eval_steps_per_second": 61.418, "step": 1072000 }, { "epoch": 22.74, "eval_loss": 3.5731077194213867, "eval_runtime": 40.7653, "eval_samples_per_second": 981.226, "eval_steps_per_second": 61.327, "step": 1080000 }, { "epoch": 22.9, "learning_rate": 2.2413333333333333e-07, "loss": 3.7395, "step": 1088000 }, { "epoch": 22.9, "eval_loss": 3.5858407020568848, "eval_runtime": 40.7416, "eval_samples_per_second": 981.799, "eval_steps_per_second": 61.362, "step": 1088000 }, { "epoch": 23.07, "eval_loss": 3.593097686767578, "eval_runtime": 40.8353, "eval_samples_per_second": 979.546, "eval_steps_per_second": 61.222, "step": 1096000 }, { "epoch": 23.24, "learning_rate": 2.214e-07, "loss": 3.7466, "step": 1104000 }, { "epoch": 23.24, "eval_loss": 3.561359405517578, "eval_runtime": 40.8333, "eval_samples_per_second": 979.593, "eval_steps_per_second": 61.225, "step": 1104000 }, { "epoch": 23.41, "eval_loss": 3.54563045501709, "eval_runtime": 40.8237, "eval_samples_per_second": 979.822, "eval_steps_per_second": 61.239, "step": 1112000 }, { "epoch": 23.58, "learning_rate": 2.1866666666666667e-07, "loss": 3.7503, "step": 1120000 }, { "epoch": 23.58, "eval_loss": 3.5894973278045654, "eval_runtime": 40.774, "eval_samples_per_second": 981.017, "eval_steps_per_second": 61.314, "step": 1120000 }, { "epoch": 23.75, "eval_loss": 3.560800075531006, "eval_runtime": 40.772, "eval_samples_per_second": 981.064, "eval_steps_per_second": 61.317, "step": 1128000 }, { "epoch": 23.92, "learning_rate": 2.1593333333333332e-07, "loss": 3.7484, "step": 1136000 }, { "epoch": 23.92, "eval_loss": 3.569559335708618, "eval_runtime": 40.8573, "eval_samples_per_second": 979.016, "eval_steps_per_second": 61.189, "step": 1136000 }, { "epoch": 24.08, "eval_loss": 3.5653023719787598, "eval_runtime": 40.8073, "eval_samples_per_second": 980.216, "eval_steps_per_second": 61.264, "step": 1144000 }, { "epoch": 24.25, "learning_rate": 2.132e-07, "loss": 3.7435, "step": 1152000 }, { "epoch": 24.25, "eval_loss": 3.5721492767333984, "eval_runtime": 41.2214, "eval_samples_per_second": 970.369, "eval_steps_per_second": 60.648, "step": 1152000 }, { "epoch": 24.42, "eval_loss": 3.5510270595550537, "eval_runtime": 40.8174, "eval_samples_per_second": 979.974, "eval_steps_per_second": 61.248, "step": 1160000 }, { "epoch": 24.59, "learning_rate": 2.1046666666666666e-07, "loss": 3.7348, "step": 1168000 }, { "epoch": 24.59, "eval_loss": 3.5631327629089355, "eval_runtime": 40.9539, "eval_samples_per_second": 976.708, "eval_steps_per_second": 61.044, "step": 1168000 }, { "epoch": 24.76, "eval_loss": 3.5727241039276123, "eval_runtime": 40.9289, "eval_samples_per_second": 977.305, "eval_steps_per_second": 61.082, "step": 1176000 }, { "epoch": 24.93, "learning_rate": 2.0773333333333334e-07, "loss": 3.7341, "step": 1184000 }, { "epoch": 24.93, "eval_loss": 3.5835182666778564, "eval_runtime": 40.8074, "eval_samples_per_second": 980.214, "eval_steps_per_second": 61.263, "step": 1184000 }, { "epoch": 25.09, "eval_loss": 3.5766148567199707, "eval_runtime": 40.8582, "eval_samples_per_second": 978.995, "eval_steps_per_second": 61.187, "step": 1192000 }, { "epoch": 25.26, "learning_rate": 2.05e-07, "loss": 3.7435, "step": 1200000 }, { "epoch": 25.26, "eval_loss": 3.560559034347534, "eval_runtime": 40.757, "eval_samples_per_second": 981.427, "eval_steps_per_second": 61.339, "step": 1200000 }, { "epoch": 25.43, "eval_loss": 3.5497019290924072, "eval_runtime": 40.8866, "eval_samples_per_second": 978.316, "eval_steps_per_second": 61.145, "step": 1208000 }, { "epoch": 25.6, "learning_rate": 2.0226666666666668e-07, "loss": 3.732, "step": 1216000 }, { "epoch": 25.6, "eval_loss": 3.543264150619507, "eval_runtime": 41.019, "eval_samples_per_second": 975.158, "eval_steps_per_second": 60.947, "step": 1216000 }, { "epoch": 25.77, "eval_loss": 3.5419702529907227, "eval_runtime": 40.8947, "eval_samples_per_second": 978.121, "eval_steps_per_second": 61.133, "step": 1224000 }, { "epoch": 25.94, "learning_rate": 1.9953333333333333e-07, "loss": 3.7343, "step": 1232000 }, { "epoch": 25.94, "eval_loss": 3.598745346069336, "eval_runtime": 40.7705, "eval_samples_per_second": 981.101, "eval_steps_per_second": 61.319, "step": 1232000 }, { "epoch": 26.1, "eval_loss": 3.5955729484558105, "eval_runtime": 40.7487, "eval_samples_per_second": 981.626, "eval_steps_per_second": 61.352, "step": 1240000 }, { "epoch": 26.27, "learning_rate": 1.968e-07, "loss": 3.7336, "step": 1248000 }, { "epoch": 26.27, "eval_loss": 3.56730580329895, "eval_runtime": 41.2363, "eval_samples_per_second": 970.019, "eval_steps_per_second": 60.626, "step": 1248000 }, { "epoch": 26.44, "eval_loss": 3.564300775527954, "eval_runtime": 40.7963, "eval_samples_per_second": 980.481, "eval_steps_per_second": 61.28, "step": 1256000 }, { "epoch": 26.61, "learning_rate": 1.9406666666666667e-07, "loss": 3.7444, "step": 1264000 }, { "epoch": 26.61, "eval_loss": 3.5847718715667725, "eval_runtime": 40.905, "eval_samples_per_second": 977.875, "eval_steps_per_second": 61.117, "step": 1264000 }, { "epoch": 26.78, "eval_loss": 3.5693116188049316, "eval_runtime": 40.875, "eval_samples_per_second": 978.593, "eval_steps_per_second": 61.162, "step": 1272000 }, { "epoch": 26.95, "learning_rate": 1.9133333333333333e-07, "loss": 3.7395, "step": 1280000 }, { "epoch": 26.95, "eval_loss": 3.574453830718994, "eval_runtime": 40.829, "eval_samples_per_second": 979.695, "eval_steps_per_second": 61.231, "step": 1280000 }, { "epoch": 27.12, "eval_loss": 3.5757510662078857, "eval_runtime": 41.0564, "eval_samples_per_second": 974.269, "eval_steps_per_second": 60.892, "step": 1288000 }, { "epoch": 27.28, "learning_rate": 1.886e-07, "loss": 3.7389, "step": 1296000 }, { "epoch": 27.28, "eval_loss": 3.568530559539795, "eval_runtime": 40.9717, "eval_samples_per_second": 976.284, "eval_steps_per_second": 61.018, "step": 1296000 }, { "epoch": 27.45, "eval_loss": 3.5712063312530518, "eval_runtime": 41.0295, "eval_samples_per_second": 974.909, "eval_steps_per_second": 60.932, "step": 1304000 }, { "epoch": 27.62, "learning_rate": 1.8586666666666666e-07, "loss": 3.7416, "step": 1312000 }, { "epoch": 27.62, "eval_loss": 3.5692648887634277, "eval_runtime": 41.0883, "eval_samples_per_second": 973.513, "eval_steps_per_second": 60.845, "step": 1312000 }, { "epoch": 27.79, "eval_loss": 3.574037790298462, "eval_runtime": 41.0301, "eval_samples_per_second": 974.894, "eval_steps_per_second": 60.931, "step": 1320000 }, { "epoch": 27.96, "learning_rate": 1.8313333333333332e-07, "loss": 3.7305, "step": 1328000 }, { "epoch": 27.96, "eval_loss": 3.580286741256714, "eval_runtime": 41.1151, "eval_samples_per_second": 972.878, "eval_steps_per_second": 60.805, "step": 1328000 }, { "epoch": 28.13, "eval_loss": 3.5682218074798584, "eval_runtime": 41.0432, "eval_samples_per_second": 974.583, "eval_steps_per_second": 60.911, "step": 1336000 }, { "epoch": 28.29, "learning_rate": 1.804e-07, "loss": 3.7268, "step": 1344000 }, { "epoch": 28.29, "eval_loss": 3.592750310897827, "eval_runtime": 41.2516, "eval_samples_per_second": 969.659, "eval_steps_per_second": 60.604, "step": 1344000 }, { "epoch": 28.46, "eval_loss": 3.560762405395508, "eval_runtime": 41.1073, "eval_samples_per_second": 973.062, "eval_steps_per_second": 60.816, "step": 1352000 }, { "epoch": 28.63, "learning_rate": 1.7766666666666666e-07, "loss": 3.7363, "step": 1360000 }, { "epoch": 28.63, "eval_loss": 3.5586514472961426, "eval_runtime": 41.0403, "eval_samples_per_second": 974.651, "eval_steps_per_second": 60.916, "step": 1360000 }, { "epoch": 28.8, "eval_loss": 3.5602593421936035, "eval_runtime": 41.0821, "eval_samples_per_second": 973.659, "eval_steps_per_second": 60.854, "step": 1368000 }, { "epoch": 28.97, "learning_rate": 1.7493333333333334e-07, "loss": 3.7325, "step": 1376000 }, { "epoch": 28.97, "eval_loss": 3.5710933208465576, "eval_runtime": 40.9996, "eval_samples_per_second": 975.619, "eval_steps_per_second": 60.976, "step": 1376000 }, { "epoch": 29.14, "eval_loss": 3.5828020572662354, "eval_runtime": 41.0029, "eval_samples_per_second": 975.54, "eval_steps_per_second": 60.971, "step": 1384000 }, { "epoch": 29.3, "learning_rate": 1.722e-07, "loss": 3.7337, "step": 1392000 }, { "epoch": 29.3, "eval_loss": 3.5789852142333984, "eval_runtime": 40.9538, "eval_samples_per_second": 976.711, "eval_steps_per_second": 61.044, "step": 1392000 }, { "epoch": 29.47, "eval_loss": 3.5794663429260254, "eval_runtime": 41.0721, "eval_samples_per_second": 973.898, "eval_steps_per_second": 60.869, "step": 1400000 }, { "epoch": 29.64, "learning_rate": 1.6946666666666668e-07, "loss": 3.7367, "step": 1408000 }, { "epoch": 29.64, "eval_loss": 3.552762031555176, "eval_runtime": 41.0733, "eval_samples_per_second": 973.869, "eval_steps_per_second": 60.867, "step": 1408000 }, { "epoch": 29.81, "eval_loss": 3.5766358375549316, "eval_runtime": 41.1321, "eval_samples_per_second": 972.476, "eval_steps_per_second": 60.78, "step": 1416000 }, { "epoch": 29.98, "learning_rate": 1.6673333333333333e-07, "loss": 3.7313, "step": 1424000 }, { "epoch": 29.98, "eval_loss": 3.56103777885437, "eval_runtime": 41.1652, "eval_samples_per_second": 971.695, "eval_steps_per_second": 60.731, "step": 1424000 }, { "epoch": 30.15, "eval_loss": 3.5833914279937744, "eval_runtime": 41.1146, "eval_samples_per_second": 972.891, "eval_steps_per_second": 60.806, "step": 1432000 }, { "epoch": 30.32, "learning_rate": 1.64e-07, "loss": 3.7277, "step": 1440000 }, { "epoch": 30.32, "eval_loss": 3.554572582244873, "eval_runtime": 41.1393, "eval_samples_per_second": 972.307, "eval_steps_per_second": 60.769, "step": 1440000 }, { "epoch": 30.48, "eval_loss": 3.5534067153930664, "eval_runtime": 41.4231, "eval_samples_per_second": 965.644, "eval_steps_per_second": 60.353, "step": 1448000 }, { "epoch": 30.65, "learning_rate": 1.6126666666666667e-07, "loss": 3.7296, "step": 1456000 }, { "epoch": 30.65, "eval_loss": 3.564561605453491, "eval_runtime": 41.4067, "eval_samples_per_second": 966.026, "eval_steps_per_second": 60.377, "step": 1456000 }, { "epoch": 30.82, "eval_loss": 3.543602705001831, "eval_runtime": 41.1326, "eval_samples_per_second": 972.466, "eval_steps_per_second": 60.779, "step": 1464000 }, { "epoch": 30.99, "learning_rate": 1.5853333333333332e-07, "loss": 3.7411, "step": 1472000 }, { "epoch": 30.99, "eval_loss": 3.5777840614318848, "eval_runtime": 41.3406, "eval_samples_per_second": 967.572, "eval_steps_per_second": 60.473, "step": 1472000 }, { "epoch": 31.16, "eval_loss": 3.554126262664795, "eval_runtime": 41.1916, "eval_samples_per_second": 971.072, "eval_steps_per_second": 60.692, "step": 1480000 }, { "epoch": 31.33, "learning_rate": 1.558e-07, "loss": 3.7233, "step": 1488000 }, { "epoch": 31.33, "eval_loss": 3.5719780921936035, "eval_runtime": 41.1623, "eval_samples_per_second": 971.763, "eval_steps_per_second": 60.735, "step": 1488000 }, { "epoch": 31.49, "eval_loss": 3.5567288398742676, "eval_runtime": 41.1991, "eval_samples_per_second": 970.896, "eval_steps_per_second": 60.681, "step": 1496000 }, { "epoch": 31.66, "learning_rate": 1.5306666666666666e-07, "loss": 3.7291, "step": 1504000 }, { "epoch": 31.66, "eval_loss": 3.547672748565674, "eval_runtime": 41.202, "eval_samples_per_second": 970.826, "eval_steps_per_second": 60.677, "step": 1504000 }, { "epoch": 31.83, "eval_loss": 3.5557453632354736, "eval_runtime": 41.3088, "eval_samples_per_second": 968.316, "eval_steps_per_second": 60.52, "step": 1512000 }, { "epoch": 32.0, "learning_rate": 1.5033333333333332e-07, "loss": 3.7265, "step": 1520000 }, { "epoch": 32.0, "eval_loss": 3.564314842224121, "eval_runtime": 41.189, "eval_samples_per_second": 971.134, "eval_steps_per_second": 60.696, "step": 1520000 }, { "epoch": 32.17, "eval_loss": 3.5738565921783447, "eval_runtime": 41.1759, "eval_samples_per_second": 971.443, "eval_steps_per_second": 60.715, "step": 1528000 }, { "epoch": 32.34, "learning_rate": 1.476e-07, "loss": 3.7352, "step": 1536000 }, { "epoch": 32.34, "eval_loss": 3.562810182571411, "eval_runtime": 41.1629, "eval_samples_per_second": 971.75, "eval_steps_per_second": 60.734, "step": 1536000 }, { "epoch": 32.5, "eval_loss": 3.5542376041412354, "eval_runtime": 41.3509, "eval_samples_per_second": 967.331, "eval_steps_per_second": 60.458, "step": 1544000 }, { "epoch": 32.67, "learning_rate": 1.4486666666666665e-07, "loss": 3.7353, "step": 1552000 }, { "epoch": 32.67, "eval_loss": 3.549649715423584, "eval_runtime": 41.29, "eval_samples_per_second": 968.757, "eval_steps_per_second": 60.547, "step": 1552000 }, { "epoch": 32.84, "eval_loss": 3.5736968517303467, "eval_runtime": 41.3467, "eval_samples_per_second": 967.429, "eval_steps_per_second": 60.464, "step": 1560000 }, { "epoch": 33.01, "learning_rate": 1.4213333333333334e-07, "loss": 3.7243, "step": 1568000 }, { "epoch": 33.01, "eval_loss": 3.578824520111084, "eval_runtime": 41.2986, "eval_samples_per_second": 968.556, "eval_steps_per_second": 60.535, "step": 1568000 }, { "epoch": 33.18, "eval_loss": 3.5630581378936768, "eval_runtime": 41.302, "eval_samples_per_second": 968.476, "eval_steps_per_second": 60.53, "step": 1576000 }, { "epoch": 33.35, "learning_rate": 1.3940000000000002e-07, "loss": 3.7192, "step": 1584000 }, { "epoch": 33.35, "eval_loss": 3.543769598007202, "eval_runtime": 41.2526, "eval_samples_per_second": 969.637, "eval_steps_per_second": 60.602, "step": 1584000 }, { "epoch": 33.52, "eval_loss": 3.5554468631744385, "eval_runtime": 41.4086, "eval_samples_per_second": 965.983, "eval_steps_per_second": 60.374, "step": 1592000 }, { "epoch": 33.68, "learning_rate": 1.3666666666666665e-07, "loss": 3.7266, "step": 1600000 }, { "epoch": 33.68, "eval_loss": 3.574753999710083, "eval_runtime": 41.1899, "eval_samples_per_second": 971.113, "eval_steps_per_second": 60.695, "step": 1600000 }, { "epoch": 33.85, "eval_loss": 3.5620429515838623, "eval_runtime": 41.192, "eval_samples_per_second": 971.061, "eval_steps_per_second": 60.691, "step": 1608000 }, { "epoch": 34.02, "learning_rate": 1.3393333333333333e-07, "loss": 3.73, "step": 1616000 }, { "epoch": 34.02, "eval_loss": 3.5463855266571045, "eval_runtime": 41.1647, "eval_samples_per_second": 971.707, "eval_steps_per_second": 60.732, "step": 1616000 }, { "epoch": 34.19, "eval_loss": 3.56703782081604, "eval_runtime": 41.345, "eval_samples_per_second": 967.469, "eval_steps_per_second": 60.467, "step": 1624000 }, { "epoch": 34.36, "learning_rate": 1.312e-07, "loss": 3.7264, "step": 1632000 }, { "epoch": 34.36, "eval_loss": 3.562556743621826, "eval_runtime": 41.1519, "eval_samples_per_second": 972.009, "eval_steps_per_second": 60.751, "step": 1632000 }, { "epoch": 34.53, "eval_loss": 3.5640175342559814, "eval_runtime": 41.225, "eval_samples_per_second": 970.286, "eval_steps_per_second": 60.643, "step": 1640000 }, { "epoch": 34.69, "learning_rate": 1.2846666666666667e-07, "loss": 3.7317, "step": 1648000 }, { "epoch": 34.69, "eval_loss": 3.565020799636841, "eval_runtime": 41.2445, "eval_samples_per_second": 969.827, "eval_steps_per_second": 60.614, "step": 1648000 }, { "epoch": 34.86, "eval_loss": 3.545823335647583, "eval_runtime": 41.181, "eval_samples_per_second": 971.323, "eval_steps_per_second": 60.708, "step": 1656000 }, { "epoch": 35.03, "learning_rate": 1.2573333333333332e-07, "loss": 3.7332, "step": 1664000 }, { "epoch": 35.03, "eval_loss": 3.556736707687378, "eval_runtime": 41.2053, "eval_samples_per_second": 970.749, "eval_steps_per_second": 60.672, "step": 1664000 }, { "epoch": 35.2, "eval_loss": 3.561016321182251, "eval_runtime": 41.1105, "eval_samples_per_second": 972.987, "eval_steps_per_second": 60.812, "step": 1672000 }, { "epoch": 35.37, "learning_rate": 1.23e-07, "loss": 3.7248, "step": 1680000 }, { "epoch": 35.37, "eval_loss": 3.565033197402954, "eval_runtime": 41.1314, "eval_samples_per_second": 972.494, "eval_steps_per_second": 60.781, "step": 1680000 }, { "epoch": 35.54, "eval_loss": 3.5579514503479004, "eval_runtime": 41.2359, "eval_samples_per_second": 970.029, "eval_steps_per_second": 60.627, "step": 1688000 }, { "epoch": 35.7, "learning_rate": 1.2026666666666666e-07, "loss": 3.7232, "step": 1696000 }, { "epoch": 35.7, "eval_loss": 3.5829317569732666, "eval_runtime": 41.5316, "eval_samples_per_second": 963.123, "eval_steps_per_second": 60.195, "step": 1696000 }, { "epoch": 35.87, "eval_loss": 3.553234338760376, "eval_runtime": 41.2091, "eval_samples_per_second": 970.659, "eval_steps_per_second": 60.666, "step": 1704000 }, { "epoch": 36.04, "learning_rate": 1.1753333333333334e-07, "loss": 3.729, "step": 1712000 }, { "epoch": 36.04, "eval_loss": 3.5723159313201904, "eval_runtime": 41.2473, "eval_samples_per_second": 969.76, "eval_steps_per_second": 60.61, "step": 1712000 }, { "epoch": 36.21, "eval_loss": 3.5453789234161377, "eval_runtime": 41.2829, "eval_samples_per_second": 968.923, "eval_steps_per_second": 60.558, "step": 1720000 }, { "epoch": 36.38, "learning_rate": 1.1480000000000001e-07, "loss": 3.7273, "step": 1728000 }, { "epoch": 36.38, "eval_loss": 3.5622527599334717, "eval_runtime": 41.2679, "eval_samples_per_second": 969.276, "eval_steps_per_second": 60.58, "step": 1728000 }, { "epoch": 36.55, "eval_loss": 3.5461573600769043, "eval_runtime": 41.2796, "eval_samples_per_second": 969.001, "eval_steps_per_second": 60.563, "step": 1736000 }, { "epoch": 36.72, "learning_rate": 1.1206666666666666e-07, "loss": 3.7261, "step": 1744000 }, { "epoch": 36.72, "eval_loss": 3.574284315109253, "eval_runtime": 41.1482, "eval_samples_per_second": 972.096, "eval_steps_per_second": 60.756, "step": 1744000 }, { "epoch": 36.88, "eval_loss": 3.5637691020965576, "eval_runtime": 41.2008, "eval_samples_per_second": 970.856, "eval_steps_per_second": 60.678, "step": 1752000 }, { "epoch": 37.05, "learning_rate": 1.0933333333333333e-07, "loss": 3.7208, "step": 1760000 }, { "epoch": 37.05, "eval_loss": 3.55189847946167, "eval_runtime": 41.2562, "eval_samples_per_second": 969.552, "eval_steps_per_second": 60.597, "step": 1760000 }, { "epoch": 37.22, "eval_loss": 3.558392286300659, "eval_runtime": 41.4546, "eval_samples_per_second": 964.911, "eval_steps_per_second": 60.307, "step": 1768000 }, { "epoch": 37.39, "learning_rate": 1.066e-07, "loss": 3.7183, "step": 1776000 }, { "epoch": 37.39, "eval_loss": 3.5308432579040527, "eval_runtime": 41.3019, "eval_samples_per_second": 968.479, "eval_steps_per_second": 60.53, "step": 1776000 }, { "epoch": 37.56, "eval_loss": 3.554945230484009, "eval_runtime": 41.2828, "eval_samples_per_second": 968.925, "eval_steps_per_second": 60.558, "step": 1784000 }, { "epoch": 37.73, "learning_rate": 1.0386666666666667e-07, "loss": 3.7193, "step": 1792000 }, { "epoch": 37.73, "eval_loss": 3.540940999984741, "eval_runtime": 41.3223, "eval_samples_per_second": 968.0, "eval_steps_per_second": 60.5, "step": 1792000 }, { "epoch": 37.89, "eval_loss": 3.5396194458007812, "eval_runtime": 41.4779, "eval_samples_per_second": 964.368, "eval_steps_per_second": 60.273, "step": 1800000 }, { "epoch": 38.06, "learning_rate": 1.0113333333333334e-07, "loss": 3.7271, "step": 1808000 }, { "epoch": 38.06, "eval_loss": 3.553603172302246, "eval_runtime": 41.2755, "eval_samples_per_second": 969.099, "eval_steps_per_second": 60.569, "step": 1808000 }, { "epoch": 38.23, "eval_loss": 3.5452282428741455, "eval_runtime": 41.2774, "eval_samples_per_second": 969.053, "eval_steps_per_second": 60.566, "step": 1816000 }, { "epoch": 38.4, "learning_rate": 9.84e-08, "loss": 3.7284, "step": 1824000 }, { "epoch": 38.4, "eval_loss": 3.558215618133545, "eval_runtime": 41.3703, "eval_samples_per_second": 966.878, "eval_steps_per_second": 60.43, "step": 1824000 }, { "epoch": 38.57, "eval_loss": 3.5667941570281982, "eval_runtime": 41.3196, "eval_samples_per_second": 968.064, "eval_steps_per_second": 60.504, "step": 1832000 }, { "epoch": 38.74, "learning_rate": 9.566666666666666e-08, "loss": 3.714, "step": 1840000 }, { "epoch": 38.74, "eval_loss": 3.567256212234497, "eval_runtime": 41.4289, "eval_samples_per_second": 965.51, "eval_steps_per_second": 60.344, "step": 1840000 }, { "epoch": 38.9, "eval_loss": 3.5477054119110107, "eval_runtime": 41.2882, "eval_samples_per_second": 968.799, "eval_steps_per_second": 60.55, "step": 1848000 }, { "epoch": 39.07, "learning_rate": 9.293333333333333e-08, "loss": 3.7105, "step": 1856000 }, { "epoch": 39.07, "eval_loss": 3.5661816596984863, "eval_runtime": 41.3711, "eval_samples_per_second": 966.858, "eval_steps_per_second": 60.429, "step": 1856000 }, { "epoch": 39.24, "eval_loss": 3.5498225688934326, "eval_runtime": 41.2668, "eval_samples_per_second": 969.303, "eval_steps_per_second": 60.581, "step": 1864000 }, { "epoch": 39.41, "learning_rate": 9.02e-08, "loss": 3.7189, "step": 1872000 }, { "epoch": 39.41, "eval_loss": 3.549349546432495, "eval_runtime": 41.3612, "eval_samples_per_second": 967.091, "eval_steps_per_second": 60.443, "step": 1872000 }, { "epoch": 39.58, "eval_loss": 3.5676159858703613, "eval_runtime": 41.3658, "eval_samples_per_second": 966.982, "eval_steps_per_second": 60.436, "step": 1880000 }, { "epoch": 39.75, "learning_rate": 8.746666666666667e-08, "loss": 3.7203, "step": 1888000 }, { "epoch": 39.75, "eval_loss": 3.5640437602996826, "eval_runtime": 41.3191, "eval_samples_per_second": 968.076, "eval_steps_per_second": 60.505, "step": 1888000 }, { "epoch": 39.91, "eval_loss": 3.574657440185547, "eval_runtime": 41.2758, "eval_samples_per_second": 969.09, "eval_steps_per_second": 60.568, "step": 1896000 }, { "epoch": 40.08, "learning_rate": 8.473333333333334e-08, "loss": 3.7271, "step": 1904000 }, { "epoch": 40.08, "eval_loss": 3.559176445007324, "eval_runtime": 41.2682, "eval_samples_per_second": 969.268, "eval_steps_per_second": 60.579, "step": 1904000 }, { "epoch": 40.25, "eval_loss": 3.5515100955963135, "eval_runtime": 41.3407, "eval_samples_per_second": 967.57, "eval_steps_per_second": 60.473, "step": 1912000 }, { "epoch": 40.42, "learning_rate": 8.2e-08, "loss": 3.7237, "step": 1920000 }, { "epoch": 40.42, "eval_loss": 3.5703725814819336, "eval_runtime": 41.32, "eval_samples_per_second": 968.053, "eval_steps_per_second": 60.503, "step": 1920000 }, { "epoch": 40.59, "eval_loss": 3.56421160697937, "eval_runtime": 41.2747, "eval_samples_per_second": 969.118, "eval_steps_per_second": 60.57, "step": 1928000 }, { "epoch": 40.76, "learning_rate": 7.926666666666666e-08, "loss": 3.723, "step": 1936000 }, { "epoch": 40.76, "eval_loss": 3.529963970184326, "eval_runtime": 41.312, "eval_samples_per_second": 968.241, "eval_steps_per_second": 60.515, "step": 1936000 }, { "epoch": 40.93, "eval_loss": 3.548246383666992, "eval_runtime": 41.3925, "eval_samples_per_second": 966.359, "eval_steps_per_second": 60.397, "step": 1944000 }, { "epoch": 41.09, "learning_rate": 7.653333333333333e-08, "loss": 3.7224, "step": 1952000 }, { "epoch": 41.09, "eval_loss": 3.558602809906006, "eval_runtime": 41.3607, "eval_samples_per_second": 967.102, "eval_steps_per_second": 60.444, "step": 1952000 }, { "epoch": 41.26, "eval_loss": 3.5462992191314697, "eval_runtime": 41.3586, "eval_samples_per_second": 967.15, "eval_steps_per_second": 60.447, "step": 1960000 }, { "epoch": 41.43, "learning_rate": 7.38e-08, "loss": 3.715, "step": 1968000 }, { "epoch": 41.43, "eval_loss": 3.5323476791381836, "eval_runtime": 41.2806, "eval_samples_per_second": 968.977, "eval_steps_per_second": 60.561, "step": 1968000 }, { "epoch": 41.6, "eval_loss": 3.5426485538482666, "eval_runtime": 41.7376, "eval_samples_per_second": 958.367, "eval_steps_per_second": 59.898, "step": 1976000 }, { "epoch": 41.77, "learning_rate": 7.106666666666667e-08, "loss": 3.7209, "step": 1984000 }, { "epoch": 41.77, "eval_loss": 3.551342487335205, "eval_runtime": 41.7643, "eval_samples_per_second": 957.756, "eval_steps_per_second": 59.86, "step": 1984000 }, { "epoch": 41.94, "eval_loss": 3.561406135559082, "eval_runtime": 41.4726, "eval_samples_per_second": 964.492, "eval_steps_per_second": 60.281, "step": 1992000 }, { "epoch": 42.1, "learning_rate": 6.833333333333332e-08, "loss": 3.7183, "step": 2000000 }, { "epoch": 42.1, "eval_loss": 3.567796468734741, "eval_runtime": 41.269, "eval_samples_per_second": 969.25, "eval_steps_per_second": 60.578, "step": 2000000 }, { "epoch": 42.27, "eval_loss": 3.5304062366485596, "eval_runtime": 41.3875, "eval_samples_per_second": 966.474, "eval_steps_per_second": 60.405, "step": 2008000 }, { "epoch": 42.44, "learning_rate": 6.56e-08, "loss": 3.7161, "step": 2016000 }, { "epoch": 42.44, "eval_loss": 3.5631351470947266, "eval_runtime": 41.3512, "eval_samples_per_second": 967.324, "eval_steps_per_second": 60.458, "step": 2016000 }, { "epoch": 42.61, "eval_loss": 3.5589487552642822, "eval_runtime": 41.3841, "eval_samples_per_second": 966.556, "eval_steps_per_second": 60.41, "step": 2024000 }, { "epoch": 42.78, "learning_rate": 6.286666666666666e-08, "loss": 3.7215, "step": 2032000 }, { "epoch": 42.78, "eval_loss": 3.5639231204986572, "eval_runtime": 41.4018, "eval_samples_per_second": 966.143, "eval_steps_per_second": 60.384, "step": 2032000 }, { "epoch": 42.95, "eval_loss": 3.5375659465789795, "eval_runtime": 41.3, "eval_samples_per_second": 968.522, "eval_steps_per_second": 60.533, "step": 2040000 }, { "epoch": 43.11, "learning_rate": 6.013333333333333e-08, "loss": 3.7205, "step": 2048000 }, { "epoch": 43.11, "eval_loss": 3.5478363037109375, "eval_runtime": 41.2814, "eval_samples_per_second": 968.959, "eval_steps_per_second": 60.56, "step": 2048000 }, { "epoch": 43.28, "eval_loss": 3.551063299179077, "eval_runtime": 41.3483, "eval_samples_per_second": 967.391, "eval_steps_per_second": 60.462, "step": 2056000 }, { "epoch": 43.45, "learning_rate": 5.7400000000000004e-08, "loss": 3.7178, "step": 2064000 }, { "epoch": 43.45, "eval_loss": 3.5284996032714844, "eval_runtime": 41.4216, "eval_samples_per_second": 965.679, "eval_steps_per_second": 60.355, "step": 2064000 }, { "epoch": 43.62, "eval_loss": 3.5428383350372314, "eval_runtime": 41.4407, "eval_samples_per_second": 965.235, "eval_steps_per_second": 60.327, "step": 2072000 }, { "epoch": 43.79, "learning_rate": 5.4666666666666666e-08, "loss": 3.7232, "step": 2080000 }, { "epoch": 43.79, "eval_loss": 3.5347280502319336, "eval_runtime": 41.6164, "eval_samples_per_second": 961.159, "eval_steps_per_second": 60.072, "step": 2080000 }, { "epoch": 43.96, "eval_loss": 3.5500776767730713, "eval_runtime": 41.2368, "eval_samples_per_second": 970.006, "eval_steps_per_second": 60.625, "step": 2088000 }, { "epoch": 44.13, "learning_rate": 5.1933333333333335e-08, "loss": 3.7167, "step": 2096000 }, { "epoch": 44.13, "eval_loss": 3.5421836376190186, "eval_runtime": 41.7632, "eval_samples_per_second": 957.781, "eval_steps_per_second": 59.861, "step": 2096000 }, { "epoch": 44.29, "eval_loss": 3.548715591430664, "eval_runtime": 42.4633, "eval_samples_per_second": 941.99, "eval_steps_per_second": 58.874, "step": 2104000 }, { "epoch": 44.46, "learning_rate": 4.92e-08, "loss": 3.7253, "step": 2112000 }, { "epoch": 44.46, "eval_loss": 3.5540173053741455, "eval_runtime": 41.2807, "eval_samples_per_second": 968.975, "eval_steps_per_second": 60.561, "step": 2112000 }, { "epoch": 44.63, "eval_loss": 3.543179988861084, "eval_runtime": 41.2704, "eval_samples_per_second": 969.217, "eval_steps_per_second": 60.576, "step": 2120000 }, { "epoch": 44.8, "learning_rate": 4.6466666666666666e-08, "loss": 3.7139, "step": 2128000 }, { "epoch": 44.8, "eval_loss": 3.550206184387207, "eval_runtime": 41.9899, "eval_samples_per_second": 952.61, "eval_steps_per_second": 59.538, "step": 2128000 }, { "epoch": 44.97, "eval_loss": 3.5449559688568115, "eval_runtime": 41.4028, "eval_samples_per_second": 966.118, "eval_steps_per_second": 60.382, "step": 2136000 }, { "epoch": 45.14, "learning_rate": 4.3733333333333335e-08, "loss": 3.7194, "step": 2144000 }, { "epoch": 45.14, "eval_loss": 3.5563695430755615, "eval_runtime": 41.3519, "eval_samples_per_second": 967.307, "eval_steps_per_second": 60.457, "step": 2144000 }, { "epoch": 45.3, "eval_loss": 3.544080972671509, "eval_runtime": 41.479, "eval_samples_per_second": 964.344, "eval_steps_per_second": 60.271, "step": 2152000 }, { "epoch": 45.47, "learning_rate": 4.1e-08, "loss": 3.7167, "step": 2160000 }, { "epoch": 45.47, "eval_loss": 3.5549235343933105, "eval_runtime": 41.7585, "eval_samples_per_second": 957.888, "eval_steps_per_second": 59.868, "step": 2160000 }, { "epoch": 45.64, "eval_loss": 3.5428645610809326, "eval_runtime": 41.4538, "eval_samples_per_second": 964.93, "eval_steps_per_second": 60.308, "step": 2168000 }, { "epoch": 45.81, "learning_rate": 3.8266666666666665e-08, "loss": 3.7202, "step": 2176000 }, { "epoch": 45.81, "eval_loss": 3.5612573623657227, "eval_runtime": 41.585, "eval_samples_per_second": 961.885, "eval_steps_per_second": 60.118, "step": 2176000 }, { "epoch": 45.98, "eval_loss": 3.546862840652466, "eval_runtime": 41.4173, "eval_samples_per_second": 965.779, "eval_steps_per_second": 60.361, "step": 2184000 }, { "epoch": 46.15, "learning_rate": 3.5533333333333334e-08, "loss": 3.7193, "step": 2192000 }, { "epoch": 46.15, "eval_loss": 3.5467140674591064, "eval_runtime": 41.5286, "eval_samples_per_second": 963.191, "eval_steps_per_second": 60.199, "step": 2192000 }, { "epoch": 46.31, "eval_loss": 3.5492827892303467, "eval_runtime": 41.8931, "eval_samples_per_second": 954.812, "eval_steps_per_second": 59.676, "step": 2200000 }, { "epoch": 46.48, "learning_rate": 3.28e-08, "loss": 3.717, "step": 2208000 }, { "epoch": 46.48, "eval_loss": 3.5651960372924805, "eval_runtime": 41.4355, "eval_samples_per_second": 965.356, "eval_steps_per_second": 60.335, "step": 2208000 }, { "epoch": 46.65, "eval_loss": 3.566892623901367, "eval_runtime": 41.4881, "eval_samples_per_second": 964.133, "eval_steps_per_second": 60.258, "step": 2216000 }, { "epoch": 46.82, "learning_rate": 3.0066666666666665e-08, "loss": 3.7164, "step": 2224000 }, { "epoch": 46.82, "eval_loss": 3.575472593307495, "eval_runtime": 41.4007, "eval_samples_per_second": 966.167, "eval_steps_per_second": 60.385, "step": 2224000 }, { "epoch": 46.99, "eval_loss": 3.5580363273620605, "eval_runtime": 41.4412, "eval_samples_per_second": 965.223, "eval_steps_per_second": 60.326, "step": 2232000 }, { "epoch": 47.16, "learning_rate": 2.7333333333333333e-08, "loss": 3.715, "step": 2240000 }, { "epoch": 47.16, "eval_loss": 3.5402655601501465, "eval_runtime": 41.4287, "eval_samples_per_second": 965.513, "eval_steps_per_second": 60.345, "step": 2240000 }, { "epoch": 47.33, "eval_loss": 3.5521085262298584, "eval_runtime": 41.929, "eval_samples_per_second": 953.993, "eval_steps_per_second": 59.625, "step": 2248000 }, { "epoch": 47.49, "learning_rate": 2.46e-08, "loss": 3.7091, "step": 2256000 }, { "epoch": 47.49, "eval_loss": 3.5604448318481445, "eval_runtime": 41.9128, "eval_samples_per_second": 954.362, "eval_steps_per_second": 59.648, "step": 2256000 }, { "epoch": 47.66, "eval_loss": 3.5401012897491455, "eval_runtime": 41.5487, "eval_samples_per_second": 962.726, "eval_steps_per_second": 60.17, "step": 2264000 }, { "epoch": 47.83, "learning_rate": 2.1866666666666667e-08, "loss": 3.7199, "step": 2272000 }, { "epoch": 47.83, "eval_loss": 3.5407750606536865, "eval_runtime": 41.6011, "eval_samples_per_second": 961.512, "eval_steps_per_second": 60.094, "step": 2272000 }, { "epoch": 48.0, "eval_loss": 3.5508713722229004, "eval_runtime": 41.6348, "eval_samples_per_second": 960.736, "eval_steps_per_second": 60.046, "step": 2280000 }, { "epoch": 48.17, "learning_rate": 1.9133333333333333e-08, "loss": 3.7238, "step": 2288000 }, { "epoch": 48.17, "eval_loss": 3.53483510017395, "eval_runtime": 41.5381, "eval_samples_per_second": 962.972, "eval_steps_per_second": 60.186, "step": 2288000 }, { "epoch": 48.34, "eval_loss": 3.5529632568359375, "eval_runtime": 41.6886, "eval_samples_per_second": 959.496, "eval_steps_per_second": 59.968, "step": 2296000 }, { "epoch": 48.5, "learning_rate": 1.64e-08, "loss": 3.7193, "step": 2304000 }, { "epoch": 48.5, "eval_loss": 3.544734001159668, "eval_runtime": 41.7228, "eval_samples_per_second": 958.709, "eval_steps_per_second": 59.919, "step": 2304000 }, { "epoch": 48.67, "eval_loss": 3.545278549194336, "eval_runtime": 41.4067, "eval_samples_per_second": 966.027, "eval_steps_per_second": 60.377, "step": 2312000 }, { "epoch": 48.84, "learning_rate": 1.3666666666666667e-08, "loss": 3.7195, "step": 2320000 }, { "epoch": 48.84, "eval_loss": 3.5487241744995117, "eval_runtime": 42.1788, "eval_samples_per_second": 948.345, "eval_steps_per_second": 59.272, "step": 2320000 }, { "epoch": 49.01, "eval_loss": 3.5356762409210205, "eval_runtime": 41.475, "eval_samples_per_second": 964.436, "eval_steps_per_second": 60.277, "step": 2328000 }, { "epoch": 49.18, "learning_rate": 1.0933333333333334e-08, "loss": 3.7187, "step": 2336000 }, { "epoch": 49.18, "eval_loss": 3.540393352508545, "eval_runtime": 41.5311, "eval_samples_per_second": 963.133, "eval_steps_per_second": 60.196, "step": 2336000 }, { "epoch": 49.35, "eval_loss": 3.524733304977417, "eval_runtime": 41.455, "eval_samples_per_second": 964.901, "eval_steps_per_second": 60.306, "step": 2344000 }, { "epoch": 49.51, "learning_rate": 8.2e-09, "loss": 3.7157, "step": 2352000 }, { "epoch": 49.51, "eval_loss": 3.5556745529174805, "eval_runtime": 41.5677, "eval_samples_per_second": 962.286, "eval_steps_per_second": 60.143, "step": 2352000 }, { "epoch": 49.68, "eval_loss": 3.553208112716675, "eval_runtime": 41.4048, "eval_samples_per_second": 966.072, "eval_steps_per_second": 60.38, "step": 2360000 }, { "epoch": 49.85, "learning_rate": 5.466666666666667e-09, "loss": 3.7144, "step": 2368000 }, { "epoch": 49.85, "eval_loss": 3.5453133583068848, "eval_runtime": 41.6628, "eval_samples_per_second": 960.089, "eval_steps_per_second": 60.006, "step": 2368000 }, { "epoch": 50.02, "eval_loss": 3.5421085357666016, "eval_runtime": 41.4612, "eval_samples_per_second": 964.757, "eval_steps_per_second": 60.297, "step": 2376000 }, { "epoch": 50.19, "learning_rate": 2.7333333333333334e-09, "loss": 3.715, "step": 2384000 }, { "epoch": 50.19, "eval_loss": 3.518317461013794, "eval_runtime": 41.4914, "eval_samples_per_second": 964.054, "eval_steps_per_second": 60.253, "step": 2384000 }, { "epoch": 50.36, "eval_loss": 3.5473098754882812, "eval_runtime": 41.4726, "eval_samples_per_second": 964.491, "eval_steps_per_second": 60.281, "step": 2392000 }, { "epoch": 50.53, "learning_rate": 0.0, "loss": 3.7208, "step": 2400000 }, { "epoch": 50.53, "eval_loss": 3.5385937690734863, "eval_runtime": 41.4411, "eval_samples_per_second": 965.224, "eval_steps_per_second": 60.327, "step": 2400000 }, { "epoch": 50.53, "step": 2400000, "total_flos": 7.752989891649069e+17, "train_loss": 3.7635687060546874, "train_runtime": 151726.3184, "train_samples_per_second": 253.087, "train_steps_per_second": 15.818 } ], "logging_steps": 16000, "max_steps": 2400000, "num_train_epochs": 51, "save_steps": 32000, "total_flos": 7.752989891649069e+17, "trial_name": null, "trial_params": null }