|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 57, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3e-05, |
|
"loss": 2.6941, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.062219502243982046, |
|
"eval_loss": 2.654296875, |
|
"eval_runtime": 3.1639, |
|
"eval_samples_per_second": 18.016, |
|
"eval_steps_per_second": 1.264, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3e-05, |
|
"loss": 2.6914, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.062219502243982046, |
|
"eval_loss": 2.654296875, |
|
"eval_runtime": 3.1507, |
|
"eval_samples_per_second": 18.091, |
|
"eval_steps_per_second": 1.27, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.997722274649974e-05, |
|
"loss": 2.6003, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.06265016546534294, |
|
"eval_loss": 2.6015625, |
|
"eval_runtime": 3.124, |
|
"eval_samples_per_second": 18.246, |
|
"eval_steps_per_second": 1.28, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.9908960159769243e-05, |
|
"loss": 2.5603, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.06265016546534294, |
|
"eval_loss": 2.5703125, |
|
"eval_runtime": 3.1396, |
|
"eval_samples_per_second": 18.155, |
|
"eval_steps_per_second": 1.274, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.9795419551040836e-05, |
|
"loss": 2.6072, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.06296749626002993, |
|
"eval_loss": 2.55078125, |
|
"eval_runtime": 2.3207, |
|
"eval_samples_per_second": 24.561, |
|
"eval_steps_per_second": 1.724, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.9636945739411533e-05, |
|
"loss": 2.5444, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.06283149734802121, |
|
"eval_loss": 2.546875, |
|
"eval_runtime": 3.1226, |
|
"eval_samples_per_second": 18.254, |
|
"eval_steps_per_second": 1.281, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.9434020004638757e-05, |
|
"loss": 2.4467, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.06292216328936036, |
|
"eval_loss": 2.55078125, |
|
"eval_runtime": 2.1195, |
|
"eval_samples_per_second": 26.894, |
|
"eval_steps_per_second": 1.887, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.9187258625509518e-05, |
|
"loss": 2.5452, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.06287683031869079, |
|
"eval_loss": 2.55078125, |
|
"eval_runtime": 2.717, |
|
"eval_samples_per_second": 20.979, |
|
"eval_steps_per_second": 1.472, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.8897411008222026e-05, |
|
"loss": 2.6128, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.06308082868670384, |
|
"eval_loss": 2.544921875, |
|
"eval_runtime": 2.9144, |
|
"eval_samples_per_second": 19.558, |
|
"eval_steps_per_second": 1.372, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.8565357410463664e-05, |
|
"loss": 2.4568, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.06265016546534294, |
|
"eval_loss": 2.5390625, |
|
"eval_runtime": 3.1199, |
|
"eval_samples_per_second": 18.27, |
|
"eval_steps_per_second": 1.282, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.8192106268097336e-05, |
|
"loss": 2.5098, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.06278616437735164, |
|
"eval_loss": 2.53515625, |
|
"eval_runtime": 3.1268, |
|
"eval_samples_per_second": 18.23, |
|
"eval_steps_per_second": 1.279, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.7778791132574908e-05, |
|
"loss": 2.6047, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.0631261616573734, |
|
"eval_loss": 2.5234375, |
|
"eval_runtime": 3.1183, |
|
"eval_samples_per_second": 18.279, |
|
"eval_steps_per_second": 1.283, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.7326667228378677e-05, |
|
"loss": 2.5022, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.0630128292306995, |
|
"eval_loss": 2.515625, |
|
"eval_runtime": 2.5192, |
|
"eval_samples_per_second": 22.626, |
|
"eval_steps_per_second": 1.588, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.6837107640945904e-05, |
|
"loss": 2.605, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.06326216056938211, |
|
"eval_loss": 2.5078125, |
|
"eval_runtime": 3.1482, |
|
"eval_samples_per_second": 18.105, |
|
"eval_steps_per_second": 1.271, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.6311599146653446e-05, |
|
"loss": 2.6055, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.06335282651072124, |
|
"eval_loss": 2.501953125, |
|
"eval_runtime": 3.116, |
|
"eval_samples_per_second": 18.293, |
|
"eval_steps_per_second": 1.284, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.575173769752677e-05, |
|
"loss": 2.5061, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.06323949408404733, |
|
"eval_loss": 2.49609375, |
|
"eval_runtime": 2.7142, |
|
"eval_samples_per_second": 21.001, |
|
"eval_steps_per_second": 1.474, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.5159223574386117e-05, |
|
"loss": 2.4348, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.06310349517203863, |
|
"eval_loss": 2.490234375, |
|
"eval_runtime": 3.1245, |
|
"eval_samples_per_second": 18.243, |
|
"eval_steps_per_second": 1.28, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.4535856223149525e-05, |
|
"loss": 2.6284, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.06319416111337776, |
|
"eval_loss": 2.48828125, |
|
"eval_runtime": 3.1284, |
|
"eval_samples_per_second": 18.22, |
|
"eval_steps_per_second": 1.279, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.3883528789974703e-05, |
|
"loss": 2.5574, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.06308082868670384, |
|
"eval_loss": 2.486328125, |
|
"eval_runtime": 2.5211, |
|
"eval_samples_per_second": 22.61, |
|
"eval_steps_per_second": 1.587, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.320422237183641e-05, |
|
"loss": 2.0814, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_accuracy": 0.0632848270547169, |
|
"eval_loss": 2.484375, |
|
"eval_runtime": 3.128, |
|
"eval_samples_per_second": 18.222, |
|
"eval_steps_per_second": 1.279, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.25e-05, |
|
"loss": 2.0636, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_accuracy": 0.06351149190806474, |
|
"eval_loss": 2.484375, |
|
"eval_runtime": 3.1237, |
|
"eval_samples_per_second": 18.248, |
|
"eval_steps_per_second": 1.281, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.25e-05, |
|
"loss": 1.9459, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_accuracy": 0.06351149190806474, |
|
"eval_loss": 2.484375, |
|
"eval_runtime": 2.9479, |
|
"eval_samples_per_second": 19.336, |
|
"eval_steps_per_second": 1.357, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.177300037466334e-05, |
|
"loss": 2.0527, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_accuracy": 0.06344349245206038, |
|
"eval_loss": 2.48828125, |
|
"eval_runtime": 2.921, |
|
"eval_samples_per_second": 19.514, |
|
"eval_steps_per_second": 1.369, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.1025431369794546e-05, |
|
"loss": 1.8881, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.06348882542272995, |
|
"eval_loss": 2.49609375, |
|
"eval_runtime": 3.1227, |
|
"eval_samples_per_second": 18.253, |
|
"eval_steps_per_second": 1.281, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.025956332789132e-05, |
|
"loss": 1.8668, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_accuracy": 0.06362482433473865, |
|
"eval_loss": 2.51171875, |
|
"eval_runtime": 3.1172, |
|
"eval_samples_per_second": 18.286, |
|
"eval_steps_per_second": 1.283, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.9477722165025422e-05, |
|
"loss": 2.0375, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_accuracy": 0.06357949136406908, |
|
"eval_loss": 2.529296875, |
|
"eval_runtime": 2.719, |
|
"eval_samples_per_second": 20.963, |
|
"eval_steps_per_second": 1.471, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.8682282307111988e-05, |
|
"loss": 1.9402, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_accuracy": 0.06319416111337776, |
|
"eval_loss": 2.544921875, |
|
"eval_runtime": 3.1228, |
|
"eval_samples_per_second": 18.253, |
|
"eval_steps_per_second": 1.281, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.7875659478856077e-05, |
|
"loss": 1.6086, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_accuracy": 0.06333016002538647, |
|
"eval_loss": 2.55859375, |
|
"eval_runtime": 2.917, |
|
"eval_samples_per_second": 19.541, |
|
"eval_steps_per_second": 1.371, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.7060303367276123e-05, |
|
"loss": 1.8185, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_accuracy": 0.06319416111337776, |
|
"eval_loss": 2.564453125, |
|
"eval_runtime": 2.7052, |
|
"eval_samples_per_second": 21.07, |
|
"eval_steps_per_second": 1.479, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.623869018208499e-05, |
|
"loss": 1.7324, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_accuracy": 0.0630128292306995, |
|
"eval_loss": 2.560546875, |
|
"eval_runtime": 3.1208, |
|
"eval_samples_per_second": 18.264, |
|
"eval_steps_per_second": 1.282, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.5413315135522434e-05, |
|
"loss": 1.9285, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.06283149734802121, |
|
"eval_loss": 2.552734375, |
|
"eval_runtime": 3.1183, |
|
"eval_samples_per_second": 18.279, |
|
"eval_steps_per_second": 1.283, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.4586684864477572e-05, |
|
"loss": 1.8031, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_accuracy": 0.06305816220136906, |
|
"eval_loss": 2.544921875, |
|
"eval_runtime": 2.9132, |
|
"eval_samples_per_second": 19.566, |
|
"eval_steps_per_second": 1.373, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.3761309817915017e-05, |
|
"loss": 1.7321, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_accuracy": 0.0630128292306995, |
|
"eval_loss": 2.53515625, |
|
"eval_runtime": 2.7189, |
|
"eval_samples_per_second": 20.964, |
|
"eval_steps_per_second": 1.471, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.2939696632723877e-05, |
|
"loss": 1.7802, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_accuracy": 0.0631488281427082, |
|
"eval_loss": 2.525390625, |
|
"eval_runtime": 3.1376, |
|
"eval_samples_per_second": 18.167, |
|
"eval_steps_per_second": 1.275, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.2124340521143929e-05, |
|
"loss": 2.0637, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_accuracy": 0.06321682759871254, |
|
"eval_loss": 2.515625, |
|
"eval_runtime": 3.126, |
|
"eval_samples_per_second": 18.234, |
|
"eval_steps_per_second": 1.28, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.1317717692888014e-05, |
|
"loss": 1.8159, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_accuracy": 0.0632848270547169, |
|
"eval_loss": 2.5078125, |
|
"eval_runtime": 2.5304, |
|
"eval_samples_per_second": 22.526, |
|
"eval_steps_per_second": 1.581, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.0522277834974586e-05, |
|
"loss": 1.7142, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_accuracy": 0.06344349245206038, |
|
"eval_loss": 2.50390625, |
|
"eval_runtime": 2.9235, |
|
"eval_samples_per_second": 19.497, |
|
"eval_steps_per_second": 1.368, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.740436672108686e-06, |
|
"loss": 1.8793, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.06337549299605603, |
|
"eval_loss": 2.5, |
|
"eval_runtime": 3.1269, |
|
"eval_samples_per_second": 18.229, |
|
"eval_steps_per_second": 1.279, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 8.974568630205462e-06, |
|
"loss": 1.6914, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_accuracy": 0.06357949136406908, |
|
"eval_loss": 2.501953125, |
|
"eval_runtime": 2.3189, |
|
"eval_samples_per_second": 24.581, |
|
"eval_steps_per_second": 1.725, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.226999625336663e-06, |
|
"loss": 1.411, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_accuracy": 0.06389682215875607, |
|
"eval_loss": 2.50390625, |
|
"eval_runtime": 3.1258, |
|
"eval_samples_per_second": 18.235, |
|
"eval_steps_per_second": 1.28, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 7.500000000000004e-06, |
|
"loss": 1.4182, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_accuracy": 0.0638514891880865, |
|
"eval_loss": 2.509765625, |
|
"eval_runtime": 3.1169, |
|
"eval_samples_per_second": 18.287, |
|
"eval_steps_per_second": 1.283, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 6.795777628163599e-06, |
|
"loss": 1.6223, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_accuracy": 0.06378348973208214, |
|
"eval_loss": 2.517578125, |
|
"eval_runtime": 2.7171, |
|
"eval_samples_per_second": 20.978, |
|
"eval_steps_per_second": 1.472, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 6.116471210025302e-06, |
|
"loss": 1.623, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_accuracy": 0.06344349245206038, |
|
"eval_loss": 2.52734375, |
|
"eval_runtime": 2.7121, |
|
"eval_samples_per_second": 21.017, |
|
"eval_steps_per_second": 1.475, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 5.464143776850483e-06, |
|
"loss": 1.5748, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_accuracy": 0.06344349245206038, |
|
"eval_loss": 2.537109375, |
|
"eval_runtime": 2.9216, |
|
"eval_samples_per_second": 19.51, |
|
"eval_steps_per_second": 1.369, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.840776425613887e-06, |
|
"loss": 1.7166, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_accuracy": 0.0631488281427082, |
|
"eval_loss": 2.546875, |
|
"eval_runtime": 3.1159, |
|
"eval_samples_per_second": 18.293, |
|
"eval_steps_per_second": 1.284, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 4.248262302473233e-06, |
|
"loss": 1.3432, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_accuracy": 0.0629901627453647, |
|
"eval_loss": 2.556640625, |
|
"eval_runtime": 2.9081, |
|
"eval_samples_per_second": 19.6, |
|
"eval_steps_per_second": 1.375, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.688400853346558e-06, |
|
"loss": 1.5325, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_accuracy": 0.06305816220136906, |
|
"eval_loss": 2.564453125, |
|
"eval_runtime": 2.9197, |
|
"eval_samples_per_second": 19.522, |
|
"eval_steps_per_second": 1.37, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.162892359054098e-06, |
|
"loss": 1.5076, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.062854163833356, |
|
"eval_loss": 2.572265625, |
|
"eval_runtime": 3.1077, |
|
"eval_samples_per_second": 18.341, |
|
"eval_steps_per_second": 1.287, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.673332771621324e-06, |
|
"loss": 1.6636, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_accuracy": 0.06274083140668207, |
|
"eval_loss": 2.578125, |
|
"eval_runtime": 2.5045, |
|
"eval_samples_per_second": 22.759, |
|
"eval_steps_per_second": 1.597, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.221208867425096e-06, |
|
"loss": 1.2897, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_accuracy": 0.06267283195067773, |
|
"eval_loss": 2.583984375, |
|
"eval_runtime": 3.1236, |
|
"eval_samples_per_second": 18.248, |
|
"eval_steps_per_second": 1.281, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.8078937319026655e-06, |
|
"loss": 1.4559, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_accuracy": 0.06265016546534294, |
|
"eval_loss": 2.587890625, |
|
"eval_runtime": 2.523, |
|
"eval_samples_per_second": 22.592, |
|
"eval_steps_per_second": 1.585, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.4346425895363385e-06, |
|
"loss": 1.3904, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_accuracy": 0.06267283195067773, |
|
"eval_loss": 2.58984375, |
|
"eval_runtime": 3.1182, |
|
"eval_samples_per_second": 18.28, |
|
"eval_steps_per_second": 1.283, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.1025889917779735e-06, |
|
"loss": 1.4961, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_accuracy": 0.0625594995240038, |
|
"eval_loss": 2.591796875, |
|
"eval_runtime": 2.3139, |
|
"eval_samples_per_second": 24.633, |
|
"eval_steps_per_second": 1.729, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 8.127413744904805e-07, |
|
"loss": 1.5276, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_accuracy": 0.06253683303866903, |
|
"eval_loss": 2.59375, |
|
"eval_runtime": 2.5424, |
|
"eval_samples_per_second": 22.42, |
|
"eval_steps_per_second": 1.573, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 5.659799953612438e-07, |
|
"loss": 1.3479, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_accuracy": 0.062468833582664675, |
|
"eval_loss": 2.595703125, |
|
"eval_runtime": 3.124, |
|
"eval_samples_per_second": 18.246, |
|
"eval_steps_per_second": 1.28, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.630542605884657e-07, |
|
"loss": 1.4094, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_accuracy": 0.06242350061199511, |
|
"eval_loss": 2.595703125, |
|
"eval_runtime": 2.3282, |
|
"eval_samples_per_second": 24.482, |
|
"eval_steps_per_second": 1.718, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 2.0458044895916516e-07, |
|
"loss": 1.5486, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.06235550115599075, |
|
"eval_loss": 2.595703125, |
|
"eval_runtime": 2.9026, |
|
"eval_samples_per_second": 19.637, |
|
"eval_steps_per_second": 1.378, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 57, |
|
"total_flos": 3114896719872.0, |
|
"train_loss": 1.9830793414199561, |
|
"train_runtime": 617.3135, |
|
"train_samples_per_second": 1.473, |
|
"train_steps_per_second": 0.092 |
|
} |
|
], |
|
"max_steps": 57, |
|
"num_train_epochs": 3, |
|
"total_flos": 3114896719872.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|