|
{ |
|
"best_metric": 1.9065207242965698, |
|
"best_model_checkpoint": "./model_tweets_2020_Q3_full/checkpoint-2176000", |
|
"epoch": 10.105220609597433, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.219714641571045, |
|
"eval_runtime": 441.2719, |
|
"eval_samples_per_second": 906.47, |
|
"eval_steps_per_second": 56.654, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.0726666666666665e-07, |
|
"loss": 2.3934, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.142895460128784, |
|
"eval_runtime": 441.97, |
|
"eval_samples_per_second": 905.039, |
|
"eval_steps_per_second": 56.565, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.1001179218292236, |
|
"eval_runtime": 441.5938, |
|
"eval_samples_per_second": 905.81, |
|
"eval_steps_per_second": 56.613, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.0453333333333336e-07, |
|
"loss": 2.2294, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.076151132583618, |
|
"eval_runtime": 441.8927, |
|
"eval_samples_per_second": 905.197, |
|
"eval_steps_per_second": 56.575, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.05153489112854, |
|
"eval_runtime": 442.4391, |
|
"eval_samples_per_second": 904.079, |
|
"eval_steps_per_second": 56.505, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.018e-07, |
|
"loss": 2.1835, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.043534755706787, |
|
"eval_runtime": 441.9647, |
|
"eval_samples_per_second": 905.05, |
|
"eval_steps_per_second": 56.566, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 2.0345780849456787, |
|
"eval_runtime": 442.2484, |
|
"eval_samples_per_second": 904.469, |
|
"eval_steps_per_second": 56.529, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.9906666666666667e-07, |
|
"loss": 2.1517, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 2.0254366397857666, |
|
"eval_runtime": 441.5788, |
|
"eval_samples_per_second": 905.841, |
|
"eval_steps_per_second": 56.615, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 2.017543315887451, |
|
"eval_runtime": 442.2237, |
|
"eval_samples_per_second": 904.52, |
|
"eval_steps_per_second": 56.532, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.963333333333333e-07, |
|
"loss": 2.1381, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 2.007652521133423, |
|
"eval_runtime": 442.7678, |
|
"eval_samples_per_second": 903.408, |
|
"eval_steps_per_second": 56.463, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 2.0028882026672363, |
|
"eval_runtime": 443.0453, |
|
"eval_samples_per_second": 902.842, |
|
"eval_steps_per_second": 56.428, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.936e-07, |
|
"loss": 2.1244, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 2.001089572906494, |
|
"eval_runtime": 442.6781, |
|
"eval_samples_per_second": 903.591, |
|
"eval_steps_per_second": 56.474, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.9979643821716309, |
|
"eval_runtime": 443.0079, |
|
"eval_samples_per_second": 902.919, |
|
"eval_steps_per_second": 56.432, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.908666666666667e-07, |
|
"loss": 2.1116, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.9901278018951416, |
|
"eval_runtime": 443.7009, |
|
"eval_samples_per_second": 901.508, |
|
"eval_steps_per_second": 56.344, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.9839751720428467, |
|
"eval_runtime": 442.1533, |
|
"eval_samples_per_second": 904.664, |
|
"eval_steps_per_second": 56.541, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.8813333333333334e-07, |
|
"loss": 2.1104, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.9885350465774536, |
|
"eval_runtime": 442.9416, |
|
"eval_samples_per_second": 903.054, |
|
"eval_steps_per_second": 56.441, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.9854729175567627, |
|
"eval_runtime": 442.2549, |
|
"eval_samples_per_second": 904.456, |
|
"eval_steps_per_second": 56.528, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.854e-07, |
|
"loss": 2.1031, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.9829412698745728, |
|
"eval_runtime": 442.7275, |
|
"eval_samples_per_second": 903.49, |
|
"eval_steps_per_second": 56.468, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.9812543392181396, |
|
"eval_runtime": 441.8999, |
|
"eval_samples_per_second": 905.182, |
|
"eval_steps_per_second": 56.574, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.8266666666666665e-07, |
|
"loss": 2.0971, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.981191873550415, |
|
"eval_runtime": 443.7308, |
|
"eval_samples_per_second": 901.447, |
|
"eval_steps_per_second": 56.34, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.979522943496704, |
|
"eval_runtime": 445.7901, |
|
"eval_samples_per_second": 897.283, |
|
"eval_steps_per_second": 56.08, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.799333333333333e-07, |
|
"loss": 2.1044, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.9738078117370605, |
|
"eval_runtime": 448.3685, |
|
"eval_samples_per_second": 892.123, |
|
"eval_steps_per_second": 55.758, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.9768003225326538, |
|
"eval_runtime": 447.4392, |
|
"eval_samples_per_second": 893.976, |
|
"eval_steps_per_second": 55.874, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.772e-07, |
|
"loss": 2.0928, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 1.978550672531128, |
|
"eval_runtime": 444.0634, |
|
"eval_samples_per_second": 900.772, |
|
"eval_steps_per_second": 56.298, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 1.9699029922485352, |
|
"eval_runtime": 448.9877, |
|
"eval_samples_per_second": 890.893, |
|
"eval_steps_per_second": 55.681, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.7446666666666667e-07, |
|
"loss": 2.0949, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 1.9700462818145752, |
|
"eval_runtime": 447.582, |
|
"eval_samples_per_second": 893.691, |
|
"eval_steps_per_second": 55.856, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 1.965348720550537, |
|
"eval_runtime": 452.353, |
|
"eval_samples_per_second": 884.265, |
|
"eval_steps_per_second": 55.267, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.7173333333333333e-07, |
|
"loss": 2.0892, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 1.9680548906326294, |
|
"eval_runtime": 448.4016, |
|
"eval_samples_per_second": 892.057, |
|
"eval_steps_per_second": 55.754, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 1.9649720191955566, |
|
"eval_runtime": 445.7154, |
|
"eval_samples_per_second": 897.434, |
|
"eval_steps_per_second": 56.09, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.69e-07, |
|
"loss": 2.0841, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 1.9638316631317139, |
|
"eval_runtime": 447.9015, |
|
"eval_samples_per_second": 893.054, |
|
"eval_steps_per_second": 55.816, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 1.9682003259658813, |
|
"eval_runtime": 448.197, |
|
"eval_samples_per_second": 892.465, |
|
"eval_steps_per_second": 55.779, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.6626666666666664e-07, |
|
"loss": 2.0887, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 1.9604706764221191, |
|
"eval_runtime": 446.1801, |
|
"eval_samples_per_second": 896.499, |
|
"eval_steps_per_second": 56.031, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 1.961388111114502, |
|
"eval_runtime": 449.8519, |
|
"eval_samples_per_second": 889.182, |
|
"eval_steps_per_second": 55.574, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.6353333333333335e-07, |
|
"loss": 2.0842, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 1.962444543838501, |
|
"eval_runtime": 447.5177, |
|
"eval_samples_per_second": 893.819, |
|
"eval_steps_per_second": 55.864, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 1.9605332612991333, |
|
"eval_runtime": 449.5723, |
|
"eval_samples_per_second": 889.734, |
|
"eval_steps_per_second": 55.608, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.608e-07, |
|
"loss": 2.0773, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 1.9554321765899658, |
|
"eval_runtime": 444.9546, |
|
"eval_samples_per_second": 898.968, |
|
"eval_steps_per_second": 56.186, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 1.957843542098999, |
|
"eval_runtime": 445.0742, |
|
"eval_samples_per_second": 898.727, |
|
"eval_steps_per_second": 56.17, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.5806666666666666e-07, |
|
"loss": 2.0795, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 1.957188606262207, |
|
"eval_runtime": 445.8149, |
|
"eval_samples_per_second": 897.233, |
|
"eval_steps_per_second": 56.077, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 1.9521090984344482, |
|
"eval_runtime": 444.7945, |
|
"eval_samples_per_second": 899.292, |
|
"eval_steps_per_second": 56.206, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.553333333333333e-07, |
|
"loss": 2.0794, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 1.9550837278366089, |
|
"eval_runtime": 446.4097, |
|
"eval_samples_per_second": 896.038, |
|
"eval_steps_per_second": 56.002, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 1.9569264650344849, |
|
"eval_runtime": 443.7576, |
|
"eval_samples_per_second": 901.393, |
|
"eval_steps_per_second": 56.337, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.5259999999999997e-07, |
|
"loss": 2.0788, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 1.9571126699447632, |
|
"eval_runtime": 445.1557, |
|
"eval_samples_per_second": 898.562, |
|
"eval_steps_per_second": 56.16, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 1.9501842260360718, |
|
"eval_runtime": 445.7741, |
|
"eval_samples_per_second": 897.315, |
|
"eval_steps_per_second": 56.082, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.498666666666667e-07, |
|
"loss": 2.0778, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 1.9543704986572266, |
|
"eval_runtime": 445.8348, |
|
"eval_samples_per_second": 897.193, |
|
"eval_steps_per_second": 56.075, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 1.947017788887024, |
|
"eval_runtime": 447.0241, |
|
"eval_samples_per_second": 894.806, |
|
"eval_steps_per_second": 55.925, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 3.4713333333333333e-07, |
|
"loss": 2.0694, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 1.9545376300811768, |
|
"eval_runtime": 446.4613, |
|
"eval_samples_per_second": 895.934, |
|
"eval_steps_per_second": 55.996, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 1.9471752643585205, |
|
"eval_runtime": 446.0717, |
|
"eval_samples_per_second": 896.717, |
|
"eval_steps_per_second": 56.045, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.444e-07, |
|
"loss": 2.0718, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 1.947655200958252, |
|
"eval_runtime": 445.8656, |
|
"eval_samples_per_second": 897.131, |
|
"eval_steps_per_second": 56.071, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_loss": 1.949648380279541, |
|
"eval_runtime": 446.4025, |
|
"eval_samples_per_second": 896.052, |
|
"eval_steps_per_second": 56.003, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.416666666666667e-07, |
|
"loss": 2.0787, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 1.943971037864685, |
|
"eval_runtime": 448.3905, |
|
"eval_samples_per_second": 892.08, |
|
"eval_steps_per_second": 55.755, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_loss": 1.9483779668807983, |
|
"eval_runtime": 448.4782, |
|
"eval_samples_per_second": 891.905, |
|
"eval_steps_per_second": 55.744, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.3893333333333335e-07, |
|
"loss": 2.0764, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 1.9475030899047852, |
|
"eval_runtime": 447.8377, |
|
"eval_samples_per_second": 893.181, |
|
"eval_steps_per_second": 55.824, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 1.946897029876709, |
|
"eval_runtime": 446.5774, |
|
"eval_samples_per_second": 895.701, |
|
"eval_steps_per_second": 55.981, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.3619999999999995e-07, |
|
"loss": 2.0795, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 1.947367787361145, |
|
"eval_runtime": 446.4599, |
|
"eval_samples_per_second": 895.937, |
|
"eval_steps_per_second": 55.996, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 1.9491695165634155, |
|
"eval_runtime": 449.1184, |
|
"eval_samples_per_second": 890.634, |
|
"eval_steps_per_second": 55.665, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.3346666666666666e-07, |
|
"loss": 2.07, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 1.9479598999023438, |
|
"eval_runtime": 447.8049, |
|
"eval_samples_per_second": 893.246, |
|
"eval_steps_per_second": 55.828, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 1.9481781721115112, |
|
"eval_runtime": 445.8382, |
|
"eval_samples_per_second": 897.186, |
|
"eval_steps_per_second": 56.074, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.307333333333333e-07, |
|
"loss": 2.0712, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 1.9497871398925781, |
|
"eval_runtime": 446.4596, |
|
"eval_samples_per_second": 895.938, |
|
"eval_steps_per_second": 55.996, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 1.9429104328155518, |
|
"eval_runtime": 446.91, |
|
"eval_samples_per_second": 895.035, |
|
"eval_steps_per_second": 55.94, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.28e-07, |
|
"loss": 2.0739, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 1.945642352104187, |
|
"eval_runtime": 446.7665, |
|
"eval_samples_per_second": 895.322, |
|
"eval_steps_per_second": 55.958, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 1.9468969106674194, |
|
"eval_runtime": 446.5973, |
|
"eval_samples_per_second": 895.661, |
|
"eval_steps_per_second": 55.979, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.252666666666667e-07, |
|
"loss": 2.0688, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 1.9466804265975952, |
|
"eval_runtime": 445.9118, |
|
"eval_samples_per_second": 897.038, |
|
"eval_steps_per_second": 56.065, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_loss": 1.945359230041504, |
|
"eval_runtime": 446.3398, |
|
"eval_samples_per_second": 896.178, |
|
"eval_steps_per_second": 56.011, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.2253333333333334e-07, |
|
"loss": 2.0706, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 1.9440275430679321, |
|
"eval_runtime": 445.8082, |
|
"eval_samples_per_second": 897.247, |
|
"eval_steps_per_second": 56.078, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 1.9401416778564453, |
|
"eval_runtime": 446.0226, |
|
"eval_samples_per_second": 896.816, |
|
"eval_steps_per_second": 56.051, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.198e-07, |
|
"loss": 2.0694, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 1.9396692514419556, |
|
"eval_runtime": 447.4896, |
|
"eval_samples_per_second": 893.876, |
|
"eval_steps_per_second": 55.867, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 1.9429028034210205, |
|
"eval_runtime": 445.7305, |
|
"eval_samples_per_second": 897.403, |
|
"eval_steps_per_second": 56.088, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.1706666666666665e-07, |
|
"loss": 2.0698, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 1.9483931064605713, |
|
"eval_runtime": 445.796, |
|
"eval_samples_per_second": 897.271, |
|
"eval_steps_per_second": 56.079, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_loss": 1.9375388622283936, |
|
"eval_runtime": 445.5629, |
|
"eval_samples_per_second": 897.741, |
|
"eval_steps_per_second": 56.109, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.1433333333333336e-07, |
|
"loss": 2.0681, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 1.9411094188690186, |
|
"eval_runtime": 446.974, |
|
"eval_samples_per_second": 894.907, |
|
"eval_steps_per_second": 55.932, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 1.9418509006500244, |
|
"eval_runtime": 445.5586, |
|
"eval_samples_per_second": 897.75, |
|
"eval_steps_per_second": 56.109, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.116e-07, |
|
"loss": 2.0676, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 1.9373135566711426, |
|
"eval_runtime": 446.9581, |
|
"eval_samples_per_second": 894.939, |
|
"eval_steps_per_second": 55.934, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 1.936606526374817, |
|
"eval_runtime": 448.0456, |
|
"eval_samples_per_second": 892.766, |
|
"eval_steps_per_second": 55.798, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.0886666666666667e-07, |
|
"loss": 2.0641, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 1.9421836137771606, |
|
"eval_runtime": 446.2468, |
|
"eval_samples_per_second": 896.365, |
|
"eval_steps_per_second": 56.023, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 1.9364625215530396, |
|
"eval_runtime": 446.5279, |
|
"eval_samples_per_second": 895.801, |
|
"eval_steps_per_second": 55.988, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.061333333333333e-07, |
|
"loss": 2.0692, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 1.941666603088379, |
|
"eval_runtime": 446.7472, |
|
"eval_samples_per_second": 895.361, |
|
"eval_steps_per_second": 55.96, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 1.938507318496704, |
|
"eval_runtime": 449.4705, |
|
"eval_samples_per_second": 889.936, |
|
"eval_steps_per_second": 55.621, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.034e-07, |
|
"loss": 2.0676, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 1.9362366199493408, |
|
"eval_runtime": 449.8, |
|
"eval_samples_per_second": 889.284, |
|
"eval_steps_per_second": 55.58, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_loss": 1.9413690567016602, |
|
"eval_runtime": 447.228, |
|
"eval_samples_per_second": 894.398, |
|
"eval_steps_per_second": 55.9, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.0066666666666663e-07, |
|
"loss": 2.0657, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 1.9437037706375122, |
|
"eval_runtime": 446.7561, |
|
"eval_samples_per_second": 895.343, |
|
"eval_steps_per_second": 55.959, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_loss": 1.935581922531128, |
|
"eval_runtime": 448.4477, |
|
"eval_samples_per_second": 891.966, |
|
"eval_steps_per_second": 55.748, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.9793333333333334e-07, |
|
"loss": 2.0638, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 1.9353160858154297, |
|
"eval_runtime": 447.7021, |
|
"eval_samples_per_second": 893.451, |
|
"eval_steps_per_second": 55.841, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 1.9384965896606445, |
|
"eval_runtime": 446.8195, |
|
"eval_samples_per_second": 895.216, |
|
"eval_steps_per_second": 55.951, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.952e-07, |
|
"loss": 2.0673, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 1.935878872871399, |
|
"eval_runtime": 447.5698, |
|
"eval_samples_per_second": 893.715, |
|
"eval_steps_per_second": 55.857, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 1.9314086437225342, |
|
"eval_runtime": 447.0842, |
|
"eval_samples_per_second": 894.686, |
|
"eval_steps_per_second": 55.918, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 2.9246666666666665e-07, |
|
"loss": 2.0634, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_loss": 1.9294040203094482, |
|
"eval_runtime": 447.3121, |
|
"eval_samples_per_second": 894.23, |
|
"eval_steps_per_second": 55.889, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_loss": 1.9346296787261963, |
|
"eval_runtime": 447.8515, |
|
"eval_samples_per_second": 893.153, |
|
"eval_steps_per_second": 55.822, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.897333333333333e-07, |
|
"loss": 2.0643, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 1.933471918106079, |
|
"eval_runtime": 446.8821, |
|
"eval_samples_per_second": 895.091, |
|
"eval_steps_per_second": 55.943, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.9316155910491943, |
|
"eval_runtime": 447.2102, |
|
"eval_samples_per_second": 894.434, |
|
"eval_steps_per_second": 55.902, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 2.8699999999999996e-07, |
|
"loss": 2.0596, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_loss": 1.9356091022491455, |
|
"eval_runtime": 450.1829, |
|
"eval_samples_per_second": 888.528, |
|
"eval_steps_per_second": 55.533, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"eval_loss": 1.9390416145324707, |
|
"eval_runtime": 447.8923, |
|
"eval_samples_per_second": 893.072, |
|
"eval_steps_per_second": 55.817, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 2.8426666666666667e-07, |
|
"loss": 2.0637, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_loss": 1.9396628141403198, |
|
"eval_runtime": 447.225, |
|
"eval_samples_per_second": 894.404, |
|
"eval_steps_per_second": 55.9, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_loss": 1.9374940395355225, |
|
"eval_runtime": 450.2246, |
|
"eval_samples_per_second": 888.445, |
|
"eval_steps_per_second": 55.528, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 2.815333333333333e-07, |
|
"loss": 2.0637, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_loss": 1.9351787567138672, |
|
"eval_runtime": 449.793, |
|
"eval_samples_per_second": 889.298, |
|
"eval_steps_per_second": 55.581, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 1.9309793710708618, |
|
"eval_runtime": 451.6574, |
|
"eval_samples_per_second": 885.627, |
|
"eval_steps_per_second": 55.352, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 2.7880000000000003e-07, |
|
"loss": 2.0681, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_loss": 1.9316256046295166, |
|
"eval_runtime": 450.9965, |
|
"eval_samples_per_second": 886.925, |
|
"eval_steps_per_second": 55.433, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_loss": 1.9269251823425293, |
|
"eval_runtime": 451.9701, |
|
"eval_samples_per_second": 885.014, |
|
"eval_steps_per_second": 55.313, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 2.7606666666666664e-07, |
|
"loss": 2.0663, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_loss": 1.930124044418335, |
|
"eval_runtime": 450.7438, |
|
"eval_samples_per_second": 887.422, |
|
"eval_steps_per_second": 55.464, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 1.9354177713394165, |
|
"eval_runtime": 452.0689, |
|
"eval_samples_per_second": 884.821, |
|
"eval_steps_per_second": 55.301, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 2.733333333333333e-07, |
|
"loss": 2.0653, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 1.9372798204421997, |
|
"eval_runtime": 449.7405, |
|
"eval_samples_per_second": 889.402, |
|
"eval_steps_per_second": 55.588, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_loss": 1.935416579246521, |
|
"eval_runtime": 448.8481, |
|
"eval_samples_per_second": 891.17, |
|
"eval_steps_per_second": 55.698, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 2.706e-07, |
|
"loss": 2.0606, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_loss": 1.9285781383514404, |
|
"eval_runtime": 449.1658, |
|
"eval_samples_per_second": 890.54, |
|
"eval_steps_per_second": 55.659, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"eval_loss": 1.9317643642425537, |
|
"eval_runtime": 454.8294, |
|
"eval_samples_per_second": 879.451, |
|
"eval_steps_per_second": 54.966, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 2.6786666666666666e-07, |
|
"loss": 2.0601, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_loss": 1.92868971824646, |
|
"eval_runtime": 451.6998, |
|
"eval_samples_per_second": 885.544, |
|
"eval_steps_per_second": 55.346, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_loss": 1.9279505014419556, |
|
"eval_runtime": 453.4329, |
|
"eval_samples_per_second": 882.159, |
|
"eval_steps_per_second": 55.135, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.651333333333333e-07, |
|
"loss": 2.0555, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_loss": 1.9279268980026245, |
|
"eval_runtime": 452.6535, |
|
"eval_samples_per_second": 883.678, |
|
"eval_steps_per_second": 55.23, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_loss": 1.9296597242355347, |
|
"eval_runtime": 453.8201, |
|
"eval_samples_per_second": 881.407, |
|
"eval_steps_per_second": 55.088, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 2.624e-07, |
|
"loss": 2.0561, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_loss": 1.929028868675232, |
|
"eval_runtime": 449.3895, |
|
"eval_samples_per_second": 890.096, |
|
"eval_steps_per_second": 55.631, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_loss": 1.9251536130905151, |
|
"eval_runtime": 449.4314, |
|
"eval_samples_per_second": 890.014, |
|
"eval_steps_per_second": 55.626, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.596666666666667e-07, |
|
"loss": 2.066, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_loss": 1.9274221658706665, |
|
"eval_runtime": 449.6898, |
|
"eval_samples_per_second": 889.502, |
|
"eval_steps_per_second": 55.594, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_loss": 1.9256700277328491, |
|
"eval_runtime": 454.59, |
|
"eval_samples_per_second": 879.914, |
|
"eval_steps_per_second": 54.995, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 2.5693333333333333e-07, |
|
"loss": 2.0634, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 1.9289941787719727, |
|
"eval_runtime": 450.5402, |
|
"eval_samples_per_second": 887.823, |
|
"eval_steps_per_second": 55.489, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"eval_loss": 1.9266842603683472, |
|
"eval_runtime": 450.2964, |
|
"eval_samples_per_second": 888.304, |
|
"eval_steps_per_second": 55.519, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 2.542e-07, |
|
"loss": 2.0613, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 1.9295446872711182, |
|
"eval_runtime": 450.1243, |
|
"eval_samples_per_second": 888.643, |
|
"eval_steps_per_second": 55.54, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_loss": 1.9299752712249756, |
|
"eval_runtime": 450.6009, |
|
"eval_samples_per_second": 887.704, |
|
"eval_steps_per_second": 55.481, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 2.5146666666666664e-07, |
|
"loss": 2.0599, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_loss": 1.932616114616394, |
|
"eval_runtime": 451.4436, |
|
"eval_samples_per_second": 886.047, |
|
"eval_steps_per_second": 55.378, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_loss": 1.931321144104004, |
|
"eval_runtime": 453.239, |
|
"eval_samples_per_second": 882.537, |
|
"eval_steps_per_second": 55.159, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 2.4873333333333335e-07, |
|
"loss": 2.0592, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_loss": 1.9236520528793335, |
|
"eval_runtime": 450.6099, |
|
"eval_samples_per_second": 887.686, |
|
"eval_steps_per_second": 55.48, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"eval_loss": 1.9271957874298096, |
|
"eval_runtime": 453.5158, |
|
"eval_samples_per_second": 881.998, |
|
"eval_steps_per_second": 55.125, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 2.46e-07, |
|
"loss": 2.0602, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"eval_loss": 1.9260518550872803, |
|
"eval_runtime": 452.3583, |
|
"eval_samples_per_second": 884.255, |
|
"eval_steps_per_second": 55.266, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_loss": 1.9283181428909302, |
|
"eval_runtime": 451.1311, |
|
"eval_samples_per_second": 886.66, |
|
"eval_steps_per_second": 55.416, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 2.4326666666666666e-07, |
|
"loss": 2.0575, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_loss": 1.929377555847168, |
|
"eval_runtime": 453.899, |
|
"eval_samples_per_second": 881.253, |
|
"eval_steps_per_second": 55.078, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"eval_loss": 1.9283651113510132, |
|
"eval_runtime": 451.3036, |
|
"eval_samples_per_second": 886.321, |
|
"eval_steps_per_second": 55.395, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 2.405333333333333e-07, |
|
"loss": 2.0585, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_loss": 1.926291823387146, |
|
"eval_runtime": 451.1498, |
|
"eval_samples_per_second": 886.623, |
|
"eval_steps_per_second": 55.414, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"eval_loss": 1.9226716756820679, |
|
"eval_runtime": 452.7662, |
|
"eval_samples_per_second": 883.458, |
|
"eval_steps_per_second": 55.216, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 2.3779999999999997e-07, |
|
"loss": 2.0535, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_loss": 1.9251185655593872, |
|
"eval_runtime": 454.2127, |
|
"eval_samples_per_second": 880.645, |
|
"eval_steps_per_second": 55.04, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_loss": 1.9273473024368286, |
|
"eval_runtime": 452.8001, |
|
"eval_samples_per_second": 883.392, |
|
"eval_steps_per_second": 55.212, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 2.3506666666666668e-07, |
|
"loss": 2.062, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"eval_loss": 1.9241611957550049, |
|
"eval_runtime": 452.8827, |
|
"eval_samples_per_second": 883.231, |
|
"eval_steps_per_second": 55.202, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_loss": 1.9242278337478638, |
|
"eval_runtime": 453.7634, |
|
"eval_samples_per_second": 881.517, |
|
"eval_steps_per_second": 55.095, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 2.3233333333333334e-07, |
|
"loss": 2.0606, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_loss": 1.925549030303955, |
|
"eval_runtime": 451.5199, |
|
"eval_samples_per_second": 885.897, |
|
"eval_steps_per_second": 55.369, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"eval_loss": 1.9233018159866333, |
|
"eval_runtime": 453.1285, |
|
"eval_samples_per_second": 882.752, |
|
"eval_steps_per_second": 55.172, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 2.2960000000000002e-07, |
|
"loss": 2.0565, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"eval_loss": 1.9242709875106812, |
|
"eval_runtime": 451.6366, |
|
"eval_samples_per_second": 885.668, |
|
"eval_steps_per_second": 55.354, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"eval_loss": 1.9271726608276367, |
|
"eval_runtime": 452.6063, |
|
"eval_samples_per_second": 883.77, |
|
"eval_steps_per_second": 55.236, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 2.2686666666666667e-07, |
|
"loss": 2.0538, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_loss": 1.9308254718780518, |
|
"eval_runtime": 452.9886, |
|
"eval_samples_per_second": 883.025, |
|
"eval_steps_per_second": 55.189, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"eval_loss": 1.923595905303955, |
|
"eval_runtime": 454.1743, |
|
"eval_samples_per_second": 880.719, |
|
"eval_steps_per_second": 55.045, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 2.2413333333333333e-07, |
|
"loss": 2.0573, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_loss": 1.9245961904525757, |
|
"eval_runtime": 453.1057, |
|
"eval_samples_per_second": 882.796, |
|
"eval_steps_per_second": 55.175, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"eval_loss": 1.9237180948257446, |
|
"eval_runtime": 453.1297, |
|
"eval_samples_per_second": 882.75, |
|
"eval_steps_per_second": 55.172, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 2.214e-07, |
|
"loss": 2.0562, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_loss": 1.9198731184005737, |
|
"eval_runtime": 453.0177, |
|
"eval_samples_per_second": 882.968, |
|
"eval_steps_per_second": 55.185, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"eval_loss": 1.9235132932662964, |
|
"eval_runtime": 452.6084, |
|
"eval_samples_per_second": 883.766, |
|
"eval_steps_per_second": 55.235, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 2.1866666666666667e-07, |
|
"loss": 2.0534, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 1.9209364652633667, |
|
"eval_runtime": 454.4995, |
|
"eval_samples_per_second": 880.089, |
|
"eval_steps_per_second": 55.006, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_loss": 1.9215258359909058, |
|
"eval_runtime": 454.5969, |
|
"eval_samples_per_second": 879.9, |
|
"eval_steps_per_second": 54.994, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 2.1593333333333332e-07, |
|
"loss": 2.0567, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"eval_loss": 1.9241678714752197, |
|
"eval_runtime": 452.6555, |
|
"eval_samples_per_second": 883.674, |
|
"eval_steps_per_second": 55.23, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"eval_loss": 1.9272419214248657, |
|
"eval_runtime": 453.105, |
|
"eval_samples_per_second": 882.797, |
|
"eval_steps_per_second": 55.175, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 2.132e-07, |
|
"loss": 2.0592, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"eval_loss": 1.9257162809371948, |
|
"eval_runtime": 456.0338, |
|
"eval_samples_per_second": 877.128, |
|
"eval_steps_per_second": 54.82, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"eval_loss": 1.9228087663650513, |
|
"eval_runtime": 456.442, |
|
"eval_samples_per_second": 876.344, |
|
"eval_steps_per_second": 54.771, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 2.1046666666666666e-07, |
|
"loss": 2.0599, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_loss": 1.9205422401428223, |
|
"eval_runtime": 455.2368, |
|
"eval_samples_per_second": 878.664, |
|
"eval_steps_per_second": 54.916, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_loss": 1.9189964532852173, |
|
"eval_runtime": 456.8477, |
|
"eval_samples_per_second": 875.565, |
|
"eval_steps_per_second": 54.723, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 2.0773333333333334e-07, |
|
"loss": 2.0504, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_loss": 1.9240779876708984, |
|
"eval_runtime": 456.0663, |
|
"eval_samples_per_second": 877.065, |
|
"eval_steps_per_second": 54.817, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_loss": 1.9213923215866089, |
|
"eval_runtime": 454.1845, |
|
"eval_samples_per_second": 880.699, |
|
"eval_steps_per_second": 55.044, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.05e-07, |
|
"loss": 2.0541, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_loss": 1.9264706373214722, |
|
"eval_runtime": 454.4773, |
|
"eval_samples_per_second": 880.132, |
|
"eval_steps_per_second": 55.008, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"eval_loss": 1.9250409603118896, |
|
"eval_runtime": 455.215, |
|
"eval_samples_per_second": 878.706, |
|
"eval_steps_per_second": 54.919, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 2.0226666666666668e-07, |
|
"loss": 2.0581, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"eval_loss": 1.9173840284347534, |
|
"eval_runtime": 455.628, |
|
"eval_samples_per_second": 877.909, |
|
"eval_steps_per_second": 54.869, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_loss": 1.923222303390503, |
|
"eval_runtime": 454.8929, |
|
"eval_samples_per_second": 879.328, |
|
"eval_steps_per_second": 54.958, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 1.9953333333333333e-07, |
|
"loss": 2.057, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"eval_loss": 1.92417311668396, |
|
"eval_runtime": 453.9991, |
|
"eval_samples_per_second": 881.059, |
|
"eval_steps_per_second": 55.066, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_loss": 1.920148491859436, |
|
"eval_runtime": 454.6117, |
|
"eval_samples_per_second": 879.872, |
|
"eval_steps_per_second": 54.992, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 1.968e-07, |
|
"loss": 2.0541, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"eval_loss": 1.918699026107788, |
|
"eval_runtime": 454.7578, |
|
"eval_samples_per_second": 879.589, |
|
"eval_steps_per_second": 54.974, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"eval_loss": 1.9204912185668945, |
|
"eval_runtime": 456.9789, |
|
"eval_samples_per_second": 875.314, |
|
"eval_steps_per_second": 54.707, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 1.9406666666666667e-07, |
|
"loss": 2.0542, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"eval_loss": 1.9177594184875488, |
|
"eval_runtime": 454.9485, |
|
"eval_samples_per_second": 879.22, |
|
"eval_steps_per_second": 54.951, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"eval_loss": 1.923877477645874, |
|
"eval_runtime": 455.3795, |
|
"eval_samples_per_second": 878.388, |
|
"eval_steps_per_second": 54.899, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 1.9133333333333333e-07, |
|
"loss": 2.0526, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"eval_loss": 1.9184619188308716, |
|
"eval_runtime": 455.3342, |
|
"eval_samples_per_second": 878.476, |
|
"eval_steps_per_second": 54.905, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"eval_loss": 1.9227157831192017, |
|
"eval_runtime": 455.665, |
|
"eval_samples_per_second": 877.838, |
|
"eval_steps_per_second": 54.865, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 1.886e-07, |
|
"loss": 2.0503, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_loss": 1.9222984313964844, |
|
"eval_runtime": 455.1314, |
|
"eval_samples_per_second": 878.867, |
|
"eval_steps_per_second": 54.929, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"eval_loss": 1.9229977130889893, |
|
"eval_runtime": 456.6723, |
|
"eval_samples_per_second": 875.902, |
|
"eval_steps_per_second": 54.744, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 1.8586666666666666e-07, |
|
"loss": 2.0579, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"eval_loss": 1.9143402576446533, |
|
"eval_runtime": 459.6633, |
|
"eval_samples_per_second": 870.202, |
|
"eval_steps_per_second": 54.388, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"eval_loss": 1.9187949895858765, |
|
"eval_runtime": 455.7334, |
|
"eval_samples_per_second": 877.706, |
|
"eval_steps_per_second": 54.857, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 1.8313333333333332e-07, |
|
"loss": 2.0523, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"eval_loss": 1.916965126991272, |
|
"eval_runtime": 460.7856, |
|
"eval_samples_per_second": 868.083, |
|
"eval_steps_per_second": 54.255, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"eval_loss": 1.9252265691757202, |
|
"eval_runtime": 461.7538, |
|
"eval_samples_per_second": 866.263, |
|
"eval_steps_per_second": 54.141, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 1.804e-07, |
|
"loss": 2.056, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_loss": 1.9182839393615723, |
|
"eval_runtime": 459.9309, |
|
"eval_samples_per_second": 869.696, |
|
"eval_steps_per_second": 54.356, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"eval_loss": 1.9236822128295898, |
|
"eval_runtime": 460.9421, |
|
"eval_samples_per_second": 867.788, |
|
"eval_steps_per_second": 54.237, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 1.7766666666666666e-07, |
|
"loss": 2.0545, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_loss": 1.919832468032837, |
|
"eval_runtime": 459.7063, |
|
"eval_samples_per_second": 870.121, |
|
"eval_steps_per_second": 54.383, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"eval_loss": 1.9224891662597656, |
|
"eval_runtime": 457.5552, |
|
"eval_samples_per_second": 874.212, |
|
"eval_steps_per_second": 54.638, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 1.7493333333333334e-07, |
|
"loss": 2.0552, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"eval_loss": 1.9172176122665405, |
|
"eval_runtime": 458.1875, |
|
"eval_samples_per_second": 873.005, |
|
"eval_steps_per_second": 54.563, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"eval_loss": 1.9179189205169678, |
|
"eval_runtime": 460.0476, |
|
"eval_samples_per_second": 869.475, |
|
"eval_steps_per_second": 54.342, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 1.722e-07, |
|
"loss": 2.0571, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"eval_loss": 1.9238101243972778, |
|
"eval_runtime": 457.847, |
|
"eval_samples_per_second": 873.654, |
|
"eval_steps_per_second": 54.603, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"eval_loss": 1.9188734292984009, |
|
"eval_runtime": 461.2969, |
|
"eval_samples_per_second": 867.12, |
|
"eval_steps_per_second": 54.195, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 1.6946666666666668e-07, |
|
"loss": 2.0637, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_loss": 1.9217091798782349, |
|
"eval_runtime": 460.9073, |
|
"eval_samples_per_second": 867.854, |
|
"eval_steps_per_second": 54.241, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_loss": 1.918999195098877, |
|
"eval_runtime": 461.2775, |
|
"eval_samples_per_second": 867.157, |
|
"eval_steps_per_second": 54.197, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.6673333333333333e-07, |
|
"loss": 2.0554, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.9259402751922607, |
|
"eval_runtime": 460.6263, |
|
"eval_samples_per_second": 868.383, |
|
"eval_steps_per_second": 54.274, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_loss": 1.9184104204177856, |
|
"eval_runtime": 456.8649, |
|
"eval_samples_per_second": 875.532, |
|
"eval_steps_per_second": 54.721, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 1.64e-07, |
|
"loss": 2.0545, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_loss": 1.924387812614441, |
|
"eval_runtime": 460.2444, |
|
"eval_samples_per_second": 869.103, |
|
"eval_steps_per_second": 54.319, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_loss": 1.920113444328308, |
|
"eval_runtime": 458.5556, |
|
"eval_samples_per_second": 872.304, |
|
"eval_steps_per_second": 54.519, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 1.6126666666666667e-07, |
|
"loss": 2.0538, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 1.9250693321228027, |
|
"eval_runtime": 458.1637, |
|
"eval_samples_per_second": 873.05, |
|
"eval_steps_per_second": 54.566, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"eval_loss": 1.9216325283050537, |
|
"eval_runtime": 458.8324, |
|
"eval_samples_per_second": 871.778, |
|
"eval_steps_per_second": 54.486, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 1.5853333333333332e-07, |
|
"loss": 2.058, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"eval_loss": 1.9220870733261108, |
|
"eval_runtime": 457.9425, |
|
"eval_samples_per_second": 873.472, |
|
"eval_steps_per_second": 54.592, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"eval_loss": 1.924748182296753, |
|
"eval_runtime": 459.3633, |
|
"eval_samples_per_second": 870.77, |
|
"eval_steps_per_second": 54.423, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 1.558e-07, |
|
"loss": 2.0482, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"eval_loss": 1.9209040403366089, |
|
"eval_runtime": 458.2832, |
|
"eval_samples_per_second": 872.823, |
|
"eval_steps_per_second": 54.551, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"eval_loss": 1.9206820726394653, |
|
"eval_runtime": 458.0167, |
|
"eval_samples_per_second": 873.331, |
|
"eval_steps_per_second": 54.583, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 1.5306666666666666e-07, |
|
"loss": 2.0528, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_loss": 1.9176584482192993, |
|
"eval_runtime": 460.0682, |
|
"eval_samples_per_second": 869.436, |
|
"eval_steps_per_second": 54.34, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"eval_loss": 1.91410231590271, |
|
"eval_runtime": 460.3351, |
|
"eval_samples_per_second": 868.932, |
|
"eval_steps_per_second": 54.308, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.5033333333333332e-07, |
|
"loss": 2.0529, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 1.921297550201416, |
|
"eval_runtime": 458.1751, |
|
"eval_samples_per_second": 873.029, |
|
"eval_steps_per_second": 54.564, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_loss": 1.9170490503311157, |
|
"eval_runtime": 459.8968, |
|
"eval_samples_per_second": 869.76, |
|
"eval_steps_per_second": 54.36, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 1.476e-07, |
|
"loss": 2.059, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"eval_loss": 1.916093111038208, |
|
"eval_runtime": 460.0388, |
|
"eval_samples_per_second": 869.492, |
|
"eval_steps_per_second": 54.343, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"eval_loss": 1.9164094924926758, |
|
"eval_runtime": 458.9188, |
|
"eval_samples_per_second": 871.614, |
|
"eval_steps_per_second": 54.476, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 1.4486666666666665e-07, |
|
"loss": 2.056, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"eval_loss": 1.9176851511001587, |
|
"eval_runtime": 458.5111, |
|
"eval_samples_per_second": 872.389, |
|
"eval_steps_per_second": 54.524, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"eval_loss": 1.9188567399978638, |
|
"eval_runtime": 459.5705, |
|
"eval_samples_per_second": 870.378, |
|
"eval_steps_per_second": 54.399, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 1.4213333333333334e-07, |
|
"loss": 2.058, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 1.9181184768676758, |
|
"eval_runtime": 461.1752, |
|
"eval_samples_per_second": 867.349, |
|
"eval_steps_per_second": 54.209, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"eval_loss": 1.9213508367538452, |
|
"eval_runtime": 460.4826, |
|
"eval_samples_per_second": 868.654, |
|
"eval_steps_per_second": 54.291, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 1.3940000000000002e-07, |
|
"loss": 2.0543, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"eval_loss": 1.9136959314346313, |
|
"eval_runtime": 459.174, |
|
"eval_samples_per_second": 871.129, |
|
"eval_steps_per_second": 54.446, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"eval_loss": 1.9180983304977417, |
|
"eval_runtime": 458.6403, |
|
"eval_samples_per_second": 872.143, |
|
"eval_steps_per_second": 54.509, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 1.3666666666666665e-07, |
|
"loss": 2.0513, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_loss": 1.9187287092208862, |
|
"eval_runtime": 460.1583, |
|
"eval_samples_per_second": 869.266, |
|
"eval_steps_per_second": 54.329, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"eval_loss": 1.917602300643921, |
|
"eval_runtime": 459.5652, |
|
"eval_samples_per_second": 870.388, |
|
"eval_steps_per_second": 54.399, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 1.3393333333333333e-07, |
|
"loss": 2.0587, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_loss": 1.9144842624664307, |
|
"eval_runtime": 459.7836, |
|
"eval_samples_per_second": 869.974, |
|
"eval_steps_per_second": 54.373, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"eval_loss": 1.9191675186157227, |
|
"eval_runtime": 461.323, |
|
"eval_samples_per_second": 867.071, |
|
"eval_steps_per_second": 54.192, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 1.312e-07, |
|
"loss": 2.053, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"eval_loss": 1.9202276468276978, |
|
"eval_runtime": 461.1419, |
|
"eval_samples_per_second": 867.412, |
|
"eval_steps_per_second": 54.213, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"eval_loss": 1.9182982444763184, |
|
"eval_runtime": 460.6765, |
|
"eval_samples_per_second": 868.288, |
|
"eval_steps_per_second": 54.268, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 1.2846666666666667e-07, |
|
"loss": 2.0543, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"eval_loss": 1.9163408279418945, |
|
"eval_runtime": 460.4522, |
|
"eval_samples_per_second": 868.711, |
|
"eval_steps_per_second": 54.294, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_loss": 1.9171490669250488, |
|
"eval_runtime": 461.2694, |
|
"eval_samples_per_second": 867.172, |
|
"eval_steps_per_second": 54.198, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 1.2573333333333332e-07, |
|
"loss": 2.0492, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"eval_loss": 1.918287754058838, |
|
"eval_runtime": 461.4172, |
|
"eval_samples_per_second": 866.894, |
|
"eval_steps_per_second": 54.181, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"eval_loss": 1.9172468185424805, |
|
"eval_runtime": 462.6921, |
|
"eval_samples_per_second": 864.506, |
|
"eval_steps_per_second": 54.032, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.23e-07, |
|
"loss": 2.0505, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"eval_loss": 1.919048547744751, |
|
"eval_runtime": 461.0722, |
|
"eval_samples_per_second": 867.543, |
|
"eval_steps_per_second": 54.221, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"eval_loss": 1.9181376695632935, |
|
"eval_runtime": 462.8167, |
|
"eval_samples_per_second": 864.273, |
|
"eval_steps_per_second": 54.017, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 1.2026666666666666e-07, |
|
"loss": 2.0548, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"eval_loss": 1.916041612625122, |
|
"eval_runtime": 464.4737, |
|
"eval_samples_per_second": 861.19, |
|
"eval_steps_per_second": 53.824, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"eval_loss": 1.9167762994766235, |
|
"eval_runtime": 461.6336, |
|
"eval_samples_per_second": 866.488, |
|
"eval_steps_per_second": 54.156, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 1.1753333333333334e-07, |
|
"loss": 2.0524, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_loss": 1.9155387878417969, |
|
"eval_runtime": 462.6415, |
|
"eval_samples_per_second": 864.6, |
|
"eval_steps_per_second": 54.038, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"eval_loss": 1.9161415100097656, |
|
"eval_runtime": 464.1686, |
|
"eval_samples_per_second": 861.756, |
|
"eval_steps_per_second": 53.86, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 1.1480000000000001e-07, |
|
"loss": 2.0539, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"eval_loss": 1.9188584089279175, |
|
"eval_runtime": 460.6378, |
|
"eval_samples_per_second": 868.361, |
|
"eval_steps_per_second": 54.273, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"eval_loss": 1.916890263557434, |
|
"eval_runtime": 462.8071, |
|
"eval_samples_per_second": 864.291, |
|
"eval_steps_per_second": 54.018, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 1.1206666666666666e-07, |
|
"loss": 2.0542, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"eval_loss": 1.91769278049469, |
|
"eval_runtime": 465.0529, |
|
"eval_samples_per_second": 860.117, |
|
"eval_steps_per_second": 53.757, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"eval_loss": 1.9140154123306274, |
|
"eval_runtime": 464.2522, |
|
"eval_samples_per_second": 861.601, |
|
"eval_steps_per_second": 53.85, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 1.0933333333333333e-07, |
|
"loss": 2.0509, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_loss": 1.9151808023452759, |
|
"eval_runtime": 464.5418, |
|
"eval_samples_per_second": 861.064, |
|
"eval_steps_per_second": 53.816, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"eval_loss": 1.9160494804382324, |
|
"eval_runtime": 461.3857, |
|
"eval_samples_per_second": 866.954, |
|
"eval_steps_per_second": 54.185, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 1.066e-07, |
|
"loss": 2.0507, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"eval_loss": 1.9155534505844116, |
|
"eval_runtime": 462.7795, |
|
"eval_samples_per_second": 864.343, |
|
"eval_steps_per_second": 54.021, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"eval_loss": 1.9139437675476074, |
|
"eval_runtime": 462.6977, |
|
"eval_samples_per_second": 864.495, |
|
"eval_steps_per_second": 54.031, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 1.0386666666666667e-07, |
|
"loss": 2.057, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_loss": 1.913998007774353, |
|
"eval_runtime": 463.7624, |
|
"eval_samples_per_second": 862.511, |
|
"eval_steps_per_second": 53.907, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"eval_loss": 1.924768090248108, |
|
"eval_runtime": 462.9309, |
|
"eval_samples_per_second": 864.06, |
|
"eval_steps_per_second": 54.004, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 1.0113333333333334e-07, |
|
"loss": 2.0515, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"eval_loss": 1.914266586303711, |
|
"eval_runtime": 464.2581, |
|
"eval_samples_per_second": 861.59, |
|
"eval_steps_per_second": 53.849, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"eval_loss": 1.9187840223312378, |
|
"eval_runtime": 464.8964, |
|
"eval_samples_per_second": 860.407, |
|
"eval_steps_per_second": 53.775, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 9.84e-08, |
|
"loss": 2.0503, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"eval_loss": 1.9127299785614014, |
|
"eval_runtime": 463.729, |
|
"eval_samples_per_second": 862.573, |
|
"eval_steps_per_second": 53.911, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"eval_loss": 1.913232445716858, |
|
"eval_runtime": 463.083, |
|
"eval_samples_per_second": 863.776, |
|
"eval_steps_per_second": 53.986, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 9.566666666666666e-08, |
|
"loss": 2.0534, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"eval_loss": 1.9128867387771606, |
|
"eval_runtime": 462.6549, |
|
"eval_samples_per_second": 864.575, |
|
"eval_steps_per_second": 54.036, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"eval_loss": 1.9195021390914917, |
|
"eval_runtime": 465.694, |
|
"eval_samples_per_second": 858.933, |
|
"eval_steps_per_second": 53.683, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 9.293333333333333e-08, |
|
"loss": 2.0553, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"eval_loss": 1.9157421588897705, |
|
"eval_runtime": 463.4557, |
|
"eval_samples_per_second": 863.081, |
|
"eval_steps_per_second": 53.943, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"eval_loss": 1.9176615476608276, |
|
"eval_runtime": 465.2945, |
|
"eval_samples_per_second": 859.671, |
|
"eval_steps_per_second": 53.729, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 9.02e-08, |
|
"loss": 2.0496, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"eval_loss": 1.9148454666137695, |
|
"eval_runtime": 466.7911, |
|
"eval_samples_per_second": 856.914, |
|
"eval_steps_per_second": 53.557, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"eval_loss": 1.9132306575775146, |
|
"eval_runtime": 465.1058, |
|
"eval_samples_per_second": 860.019, |
|
"eval_steps_per_second": 53.751, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 8.746666666666667e-08, |
|
"loss": 2.0537, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"eval_loss": 1.9183893203735352, |
|
"eval_runtime": 467.2319, |
|
"eval_samples_per_second": 856.106, |
|
"eval_steps_per_second": 53.507, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"eval_loss": 1.916000485420227, |
|
"eval_runtime": 467.99, |
|
"eval_samples_per_second": 854.719, |
|
"eval_steps_per_second": 53.42, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 8.473333333333334e-08, |
|
"loss": 2.0505, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_loss": 1.9151054620742798, |
|
"eval_runtime": 466.6412, |
|
"eval_samples_per_second": 857.19, |
|
"eval_steps_per_second": 53.574, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"eval_loss": 1.9209744930267334, |
|
"eval_runtime": 464.783, |
|
"eval_samples_per_second": 860.617, |
|
"eval_steps_per_second": 53.789, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 8.2e-08, |
|
"loss": 2.0536, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"eval_loss": 1.91732656955719, |
|
"eval_runtime": 464.5366, |
|
"eval_samples_per_second": 861.073, |
|
"eval_steps_per_second": 53.817, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"eval_loss": 1.913880467414856, |
|
"eval_runtime": 464.6612, |
|
"eval_samples_per_second": 860.842, |
|
"eval_steps_per_second": 53.803, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 7.926666666666666e-08, |
|
"loss": 2.0493, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"eval_loss": 1.9209164381027222, |
|
"eval_runtime": 464.8966, |
|
"eval_samples_per_second": 860.406, |
|
"eval_steps_per_second": 53.775, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"eval_loss": 1.915098786354065, |
|
"eval_runtime": 464.4023, |
|
"eval_samples_per_second": 861.322, |
|
"eval_steps_per_second": 53.833, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 7.653333333333333e-08, |
|
"loss": 2.052, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"eval_loss": 1.9173537492752075, |
|
"eval_runtime": 467.4898, |
|
"eval_samples_per_second": 855.634, |
|
"eval_steps_per_second": 53.477, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"eval_loss": 1.9146358966827393, |
|
"eval_runtime": 468.2303, |
|
"eval_samples_per_second": 854.28, |
|
"eval_steps_per_second": 53.393, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 7.38e-08, |
|
"loss": 2.0575, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"eval_loss": 1.916880488395691, |
|
"eval_runtime": 468.5926, |
|
"eval_samples_per_second": 853.62, |
|
"eval_steps_per_second": 53.351, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"eval_loss": 1.917311668395996, |
|
"eval_runtime": 472.4841, |
|
"eval_samples_per_second": 846.589, |
|
"eval_steps_per_second": 52.912, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 7.106666666666667e-08, |
|
"loss": 2.0499, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"eval_loss": 1.9174522161483765, |
|
"eval_runtime": 469.1988, |
|
"eval_samples_per_second": 852.517, |
|
"eval_steps_per_second": 53.282, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"eval_loss": 1.9136085510253906, |
|
"eval_runtime": 466.8287, |
|
"eval_samples_per_second": 856.845, |
|
"eval_steps_per_second": 53.553, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 6.833333333333332e-08, |
|
"loss": 2.0573, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"eval_loss": 1.9158679246902466, |
|
"eval_runtime": 465.8052, |
|
"eval_samples_per_second": 858.728, |
|
"eval_steps_per_second": 53.671, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"eval_loss": 1.9147690534591675, |
|
"eval_runtime": 466.5227, |
|
"eval_samples_per_second": 857.407, |
|
"eval_steps_per_second": 53.588, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 6.56e-08, |
|
"loss": 2.0556, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_loss": 1.9174126386642456, |
|
"eval_runtime": 469.2355, |
|
"eval_samples_per_second": 852.45, |
|
"eval_steps_per_second": 53.278, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"eval_loss": 1.9146034717559814, |
|
"eval_runtime": 467.1331, |
|
"eval_samples_per_second": 856.287, |
|
"eval_steps_per_second": 53.518, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 6.286666666666666e-08, |
|
"loss": 2.0558, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"eval_loss": 1.9152125120162964, |
|
"eval_runtime": 468.9591, |
|
"eval_samples_per_second": 852.953, |
|
"eval_steps_per_second": 53.31, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"eval_loss": 1.9124705791473389, |
|
"eval_runtime": 465.7043, |
|
"eval_samples_per_second": 858.914, |
|
"eval_steps_per_second": 53.682, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 6.013333333333333e-08, |
|
"loss": 2.0493, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"eval_loss": 1.915554165840149, |
|
"eval_runtime": 465.8577, |
|
"eval_samples_per_second": 858.631, |
|
"eval_steps_per_second": 53.664, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"eval_loss": 1.9120720624923706, |
|
"eval_runtime": 469.0695, |
|
"eval_samples_per_second": 852.752, |
|
"eval_steps_per_second": 53.297, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 5.7400000000000004e-08, |
|
"loss": 2.0492, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"eval_loss": 1.9226574897766113, |
|
"eval_runtime": 468.1309, |
|
"eval_samples_per_second": 854.462, |
|
"eval_steps_per_second": 53.404, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"eval_loss": 1.913604974746704, |
|
"eval_runtime": 468.2708, |
|
"eval_samples_per_second": 854.207, |
|
"eval_steps_per_second": 53.388, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 5.4666666666666666e-08, |
|
"loss": 2.0576, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"eval_loss": 1.9147344827651978, |
|
"eval_runtime": 467.7482, |
|
"eval_samples_per_second": 855.161, |
|
"eval_steps_per_second": 53.448, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"eval_loss": 1.9158565998077393, |
|
"eval_runtime": 469.501, |
|
"eval_samples_per_second": 851.968, |
|
"eval_steps_per_second": 53.248, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 5.1933333333333335e-08, |
|
"loss": 2.0512, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"eval_loss": 1.9115842580795288, |
|
"eval_runtime": 468.26, |
|
"eval_samples_per_second": 854.226, |
|
"eval_steps_per_second": 53.389, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"eval_loss": 1.9158958196640015, |
|
"eval_runtime": 468.1507, |
|
"eval_samples_per_second": 854.426, |
|
"eval_steps_per_second": 53.402, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 4.92e-08, |
|
"loss": 2.05, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"eval_loss": 1.9130182266235352, |
|
"eval_runtime": 469.4638, |
|
"eval_samples_per_second": 852.036, |
|
"eval_steps_per_second": 53.252, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"eval_loss": 1.9151562452316284, |
|
"eval_runtime": 467.3378, |
|
"eval_samples_per_second": 855.912, |
|
"eval_steps_per_second": 53.494, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 4.6466666666666666e-08, |
|
"loss": 2.0437, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_loss": 1.9175945520401, |
|
"eval_runtime": 468.2097, |
|
"eval_samples_per_second": 854.318, |
|
"eval_steps_per_second": 53.395, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_loss": 1.9192650318145752, |
|
"eval_runtime": 468.4437, |
|
"eval_samples_per_second": 853.891, |
|
"eval_steps_per_second": 53.368, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 4.3733333333333335e-08, |
|
"loss": 2.053, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"eval_loss": 1.912379503250122, |
|
"eval_runtime": 471.2112, |
|
"eval_samples_per_second": 848.876, |
|
"eval_steps_per_second": 53.055, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"eval_loss": 1.9138758182525635, |
|
"eval_runtime": 468.8194, |
|
"eval_samples_per_second": 853.207, |
|
"eval_steps_per_second": 53.325, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 4.1e-08, |
|
"loss": 2.0496, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_loss": 1.9128222465515137, |
|
"eval_runtime": 467.3919, |
|
"eval_samples_per_second": 855.813, |
|
"eval_steps_per_second": 53.488, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"eval_loss": 1.9161826372146606, |
|
"eval_runtime": 468.6823, |
|
"eval_samples_per_second": 853.457, |
|
"eval_steps_per_second": 53.341, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 3.8266666666666665e-08, |
|
"loss": 2.0495, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"eval_loss": 1.9065207242965698, |
|
"eval_runtime": 469.3819, |
|
"eval_samples_per_second": 852.185, |
|
"eval_steps_per_second": 53.262, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"eval_loss": 1.921074628829956, |
|
"eval_runtime": 469.081, |
|
"eval_samples_per_second": 852.731, |
|
"eval_steps_per_second": 53.296, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 3.5533333333333334e-08, |
|
"loss": 2.0468, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"eval_loss": 1.9094847440719604, |
|
"eval_runtime": 468.9812, |
|
"eval_samples_per_second": 852.913, |
|
"eval_steps_per_second": 53.307, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"eval_loss": 1.9163267612457275, |
|
"eval_runtime": 468.7925, |
|
"eval_samples_per_second": 853.256, |
|
"eval_steps_per_second": 53.329, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 3.28e-08, |
|
"loss": 2.0507, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"eval_loss": 1.9105992317199707, |
|
"eval_runtime": 471.2899, |
|
"eval_samples_per_second": 848.734, |
|
"eval_steps_per_second": 53.046, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"eval_loss": 1.9164973497390747, |
|
"eval_runtime": 469.3289, |
|
"eval_samples_per_second": 852.281, |
|
"eval_steps_per_second": 53.268, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 3.0066666666666665e-08, |
|
"loss": 2.0526, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"eval_loss": 1.9179142713546753, |
|
"eval_runtime": 469.3963, |
|
"eval_samples_per_second": 852.158, |
|
"eval_steps_per_second": 53.26, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"eval_loss": 1.9178262948989868, |
|
"eval_runtime": 473.0856, |
|
"eval_samples_per_second": 845.513, |
|
"eval_steps_per_second": 52.845, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 2.7333333333333333e-08, |
|
"loss": 2.0537, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_loss": 1.9163259267807007, |
|
"eval_runtime": 469.0021, |
|
"eval_samples_per_second": 852.875, |
|
"eval_steps_per_second": 53.305, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"eval_loss": 1.9158776998519897, |
|
"eval_runtime": 470.6196, |
|
"eval_samples_per_second": 849.943, |
|
"eval_steps_per_second": 53.121, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 2.46e-08, |
|
"loss": 2.0502, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"eval_loss": 1.9146416187286377, |
|
"eval_runtime": 469.9739, |
|
"eval_samples_per_second": 851.111, |
|
"eval_steps_per_second": 53.194, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"eval_loss": 1.916868805885315, |
|
"eval_runtime": 469.5771, |
|
"eval_samples_per_second": 851.83, |
|
"eval_steps_per_second": 53.239, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 2.1866666666666667e-08, |
|
"loss": 2.0492, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_loss": 1.9164165258407593, |
|
"eval_runtime": 472.7938, |
|
"eval_samples_per_second": 846.035, |
|
"eval_steps_per_second": 52.877, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_loss": 1.915380835533142, |
|
"eval_runtime": 469.8644, |
|
"eval_samples_per_second": 851.309, |
|
"eval_steps_per_second": 53.207, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 1.9133333333333333e-08, |
|
"loss": 2.0505, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"eval_loss": 1.9065546989440918, |
|
"eval_runtime": 471.3395, |
|
"eval_samples_per_second": 848.645, |
|
"eval_steps_per_second": 53.04, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"eval_loss": 1.9140393733978271, |
|
"eval_runtime": 469.7103, |
|
"eval_samples_per_second": 851.589, |
|
"eval_steps_per_second": 53.224, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 1.64e-08, |
|
"loss": 2.0516, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"eval_loss": 1.9124903678894043, |
|
"eval_runtime": 471.0408, |
|
"eval_samples_per_second": 849.183, |
|
"eval_steps_per_second": 53.074, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"eval_loss": 1.918357014656067, |
|
"eval_runtime": 471.1356, |
|
"eval_samples_per_second": 849.012, |
|
"eval_steps_per_second": 53.063, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 1.3666666666666667e-08, |
|
"loss": 2.0559, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"eval_loss": 1.9178434610366821, |
|
"eval_runtime": 471.2114, |
|
"eval_samples_per_second": 848.876, |
|
"eval_steps_per_second": 53.055, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"eval_loss": 1.9164119958877563, |
|
"eval_runtime": 471.4668, |
|
"eval_samples_per_second": 848.416, |
|
"eval_steps_per_second": 53.026, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 1.0933333333333334e-08, |
|
"loss": 2.0528, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"eval_loss": 1.9086965322494507, |
|
"eval_runtime": 472.3709, |
|
"eval_samples_per_second": 846.792, |
|
"eval_steps_per_second": 52.925, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"eval_loss": 1.9164988994598389, |
|
"eval_runtime": 472.0245, |
|
"eval_samples_per_second": 847.414, |
|
"eval_steps_per_second": 52.963, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 8.2e-09, |
|
"loss": 2.0559, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"eval_loss": 1.9113143682479858, |
|
"eval_runtime": 472.9967, |
|
"eval_samples_per_second": 845.672, |
|
"eval_steps_per_second": 52.854, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_loss": 1.914596676826477, |
|
"eval_runtime": 474.1092, |
|
"eval_samples_per_second": 843.687, |
|
"eval_steps_per_second": 52.73, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 5.466666666666667e-09, |
|
"loss": 2.058, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_loss": 1.9156023263931274, |
|
"eval_runtime": 472.5333, |
|
"eval_samples_per_second": 846.501, |
|
"eval_steps_per_second": 52.906, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.9136894941329956, |
|
"eval_runtime": 476.1927, |
|
"eval_samples_per_second": 839.996, |
|
"eval_steps_per_second": 52.5, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 2.7333333333333334e-09, |
|
"loss": 2.053, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"eval_loss": 1.9081106185913086, |
|
"eval_runtime": 473.1832, |
|
"eval_samples_per_second": 845.339, |
|
"eval_steps_per_second": 52.834, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"eval_loss": 1.9148355722427368, |
|
"eval_runtime": 472.3079, |
|
"eval_samples_per_second": 846.905, |
|
"eval_steps_per_second": 52.932, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 0.0, |
|
"loss": 2.0566, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"eval_loss": 1.9141788482666016, |
|
"eval_runtime": 473.7193, |
|
"eval_samples_per_second": 844.382, |
|
"eval_steps_per_second": 52.774, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"step": 2400000, |
|
"total_flos": 1.620927064872531e+18, |
|
"train_loss": 2.0669708544921876, |
|
"train_runtime": 368218.0367, |
|
"train_samples_per_second": 208.572, |
|
"train_steps_per_second": 6.518 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 11, |
|
"save_steps": 32000, |
|
"total_flos": 1.620927064872531e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|