|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 11.91962675448914, |
|
"global_step": 76000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.712319219376199e-06, |
|
"loss": 3.9787, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 3.446138381958008, |
|
"eval_runtime": 172.8011, |
|
"eval_samples_per_second": 65.578, |
|
"eval_steps_per_second": 16.395, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.7424638438752397e-05, |
|
"loss": 3.6486, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 3.351425886154175, |
|
"eval_runtime": 164.1827, |
|
"eval_samples_per_second": 69.021, |
|
"eval_steps_per_second": 17.255, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6136957658128598e-05, |
|
"loss": 3.5509, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 3.2961513996124268, |
|
"eval_runtime": 164.8921, |
|
"eval_samples_per_second": 68.724, |
|
"eval_steps_per_second": 17.181, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.4849276877504794e-05, |
|
"loss": 3.488, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 3.2645070552825928, |
|
"eval_runtime": 175.5668, |
|
"eval_samples_per_second": 64.545, |
|
"eval_steps_per_second": 16.136, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.356159609688099e-05, |
|
"loss": 3.4485, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 3.236461877822876, |
|
"eval_runtime": 172.1625, |
|
"eval_samples_per_second": 65.822, |
|
"eval_steps_per_second": 16.455, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.985484032435679e-05, |
|
"loss": 3.4183, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 3.213158130645752, |
|
"eval_runtime": 165.7563, |
|
"eval_samples_per_second": 68.365, |
|
"eval_steps_per_second": 17.091, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.9298672984727646e-05, |
|
"loss": 3.3437, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 3.199575185775757, |
|
"eval_runtime": 155.1508, |
|
"eval_samples_per_second": 73.039, |
|
"eval_steps_per_second": 18.26, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.87425056450985e-05, |
|
"loss": 3.2964, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 3.1807236671447754, |
|
"eval_runtime": 156.1445, |
|
"eval_samples_per_second": 72.574, |
|
"eval_steps_per_second": 18.143, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.8186338305469355e-05, |
|
"loss": 3.2813, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 3.1617496013641357, |
|
"eval_runtime": 169.4771, |
|
"eval_samples_per_second": 66.864, |
|
"eval_steps_per_second": 16.716, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.76301709658402e-05, |
|
"loss": 3.2586, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 3.14561128616333, |
|
"eval_runtime": 169.05, |
|
"eval_samples_per_second": 67.033, |
|
"eval_steps_per_second": 16.758, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.7074003626211057e-05, |
|
"loss": 3.248, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 3.1361846923828125, |
|
"eval_runtime": 165.9904, |
|
"eval_samples_per_second": 68.269, |
|
"eval_steps_per_second": 17.067, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 4.651783628658191e-05, |
|
"loss": 3.2413, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_loss": 3.1259961128234863, |
|
"eval_runtime": 167.4544, |
|
"eval_samples_per_second": 67.672, |
|
"eval_steps_per_second": 16.918, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.5961668946952765e-05, |
|
"loss": 3.1974, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 3.121722459793091, |
|
"eval_runtime": 167.3385, |
|
"eval_samples_per_second": 67.719, |
|
"eval_steps_per_second": 16.93, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.540550160732361e-05, |
|
"loss": 3.1014, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 3.1170709133148193, |
|
"eval_runtime": 166.2081, |
|
"eval_samples_per_second": 68.18, |
|
"eval_steps_per_second": 17.045, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 4.484933426769447e-05, |
|
"loss": 3.1037, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_loss": 3.1060075759887695, |
|
"eval_runtime": 163.3667, |
|
"eval_samples_per_second": 69.365, |
|
"eval_steps_per_second": 17.341, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.4293166928065315e-05, |
|
"loss": 3.1047, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 3.1029844284057617, |
|
"eval_runtime": 166.2417, |
|
"eval_samples_per_second": 68.166, |
|
"eval_steps_per_second": 17.041, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 4.3736999588436176e-05, |
|
"loss": 3.1004, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 3.0984201431274414, |
|
"eval_runtime": 156.2961, |
|
"eval_samples_per_second": 72.503, |
|
"eval_steps_per_second": 18.126, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.3180832248807023e-05, |
|
"loss": 3.0932, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_loss": 3.0867185592651367, |
|
"eval_runtime": 163.2041, |
|
"eval_samples_per_second": 69.435, |
|
"eval_steps_per_second": 17.359, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.262466490917788e-05, |
|
"loss": 3.0966, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_loss": 3.0811350345611572, |
|
"eval_runtime": 165.6977, |
|
"eval_samples_per_second": 68.39, |
|
"eval_steps_per_second": 17.097, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 4.2068497569548725e-05, |
|
"loss": 2.9921, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_loss": 3.089102029800415, |
|
"eval_runtime": 158.578, |
|
"eval_samples_per_second": 71.46, |
|
"eval_steps_per_second": 17.865, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 4.151233022991958e-05, |
|
"loss": 2.9753, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"eval_loss": 3.0833535194396973, |
|
"eval_runtime": 156.4016, |
|
"eval_samples_per_second": 72.454, |
|
"eval_steps_per_second": 18.114, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 4.0956162890290434e-05, |
|
"loss": 2.9872, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_loss": 3.0813159942626953, |
|
"eval_runtime": 154.7129, |
|
"eval_samples_per_second": 73.245, |
|
"eval_steps_per_second": 18.311, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 4.039999555066128e-05, |
|
"loss": 2.9916, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_loss": 3.069411516189575, |
|
"eval_runtime": 157.7992, |
|
"eval_samples_per_second": 71.813, |
|
"eval_steps_per_second": 17.953, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 3.9843828211032136e-05, |
|
"loss": 2.9885, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"eval_loss": 3.0638270378112793, |
|
"eval_runtime": 155.9919, |
|
"eval_samples_per_second": 72.645, |
|
"eval_steps_per_second": 18.161, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 3.928766087140299e-05, |
|
"loss": 2.9925, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"eval_loss": 3.0645134449005127, |
|
"eval_runtime": 154.9825, |
|
"eval_samples_per_second": 73.118, |
|
"eval_steps_per_second": 18.279, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 3.8731493531773845e-05, |
|
"loss": 2.9338, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_loss": 3.072267770767212, |
|
"eval_runtime": 160.362, |
|
"eval_samples_per_second": 70.665, |
|
"eval_steps_per_second": 17.666, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 3.817532619214469e-05, |
|
"loss": 2.8808, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"eval_loss": 3.0753729343414307, |
|
"eval_runtime": 154.306, |
|
"eval_samples_per_second": 73.439, |
|
"eval_steps_per_second": 18.36, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 3.7619158852515547e-05, |
|
"loss": 2.8947, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_loss": 3.0727920532226562, |
|
"eval_runtime": 155.5666, |
|
"eval_samples_per_second": 72.843, |
|
"eval_steps_per_second": 18.211, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 3.7062991512886394e-05, |
|
"loss": 2.8972, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"eval_loss": 3.0629560947418213, |
|
"eval_runtime": 155.1806, |
|
"eval_samples_per_second": 73.025, |
|
"eval_steps_per_second": 18.256, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 3.6506824173257255e-05, |
|
"loss": 2.8965, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"eval_loss": 3.063514471054077, |
|
"eval_runtime": 154.5456, |
|
"eval_samples_per_second": 73.325, |
|
"eval_steps_per_second": 18.331, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 3.59506568336281e-05, |
|
"loss": 2.9067, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"eval_loss": 3.055640697479248, |
|
"eval_runtime": 154.9311, |
|
"eval_samples_per_second": 73.142, |
|
"eval_steps_per_second": 18.286, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 3.539448949399896e-05, |
|
"loss": 2.8863, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_loss": 3.0722622871398926, |
|
"eval_runtime": 154.3548, |
|
"eval_samples_per_second": 73.415, |
|
"eval_steps_per_second": 18.354, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 3.4838322154369805e-05, |
|
"loss": 2.8005, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"eval_loss": 3.072225332260132, |
|
"eval_runtime": 168.3304, |
|
"eval_samples_per_second": 67.32, |
|
"eval_steps_per_second": 16.83, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 3.4282154814740666e-05, |
|
"loss": 2.8136, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"eval_loss": 3.067979097366333, |
|
"eval_runtime": 164.2522, |
|
"eval_samples_per_second": 68.991, |
|
"eval_steps_per_second": 17.248, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 3.3725987475111513e-05, |
|
"loss": 2.8092, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"eval_loss": 3.0639047622680664, |
|
"eval_runtime": 167.1507, |
|
"eval_samples_per_second": 67.795, |
|
"eval_steps_per_second": 16.949, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 3.316982013548237e-05, |
|
"loss": 2.8186, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"eval_loss": 3.0562849044799805, |
|
"eval_runtime": 158.9079, |
|
"eval_samples_per_second": 71.312, |
|
"eval_steps_per_second": 17.828, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 3.2613652795853215e-05, |
|
"loss": 2.8306, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"eval_loss": 3.0535264015197754, |
|
"eval_runtime": 150.4043, |
|
"eval_samples_per_second": 75.344, |
|
"eval_steps_per_second": 18.836, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 3.205748545622407e-05, |
|
"loss": 2.8327, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_loss": 3.0540544986724854, |
|
"eval_runtime": 159.9431, |
|
"eval_samples_per_second": 70.85, |
|
"eval_steps_per_second": 17.713, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 3.1501318116594924e-05, |
|
"loss": 2.7548, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"eval_loss": 3.068983554840088, |
|
"eval_runtime": 157.2439, |
|
"eval_samples_per_second": 72.066, |
|
"eval_steps_per_second": 18.017, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 3.094515077696578e-05, |
|
"loss": 2.7369, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"eval_loss": 3.067845344543457, |
|
"eval_runtime": 151.1866, |
|
"eval_samples_per_second": 74.954, |
|
"eval_steps_per_second": 18.738, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 3.0388983437336626e-05, |
|
"loss": 2.7471, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_loss": 3.063314914703369, |
|
"eval_runtime": 150.8738, |
|
"eval_samples_per_second": 75.109, |
|
"eval_steps_per_second": 18.777, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 2.9832816097707477e-05, |
|
"loss": 2.7576, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"eval_loss": 3.0629308223724365, |
|
"eval_runtime": 170.5929, |
|
"eval_samples_per_second": 66.427, |
|
"eval_steps_per_second": 16.607, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 2.9276648758078335e-05, |
|
"loss": 2.7566, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_loss": 3.0592966079711914, |
|
"eval_runtime": 152.4939, |
|
"eval_samples_per_second": 74.311, |
|
"eval_steps_per_second": 18.578, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 2.8720481418449186e-05, |
|
"loss": 2.7642, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"eval_loss": 3.055969715118408, |
|
"eval_runtime": 152.0719, |
|
"eval_samples_per_second": 74.517, |
|
"eval_steps_per_second": 18.629, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 2.8164314078820037e-05, |
|
"loss": 2.7268, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"eval_loss": 3.0751819610595703, |
|
"eval_runtime": 150.8723, |
|
"eval_samples_per_second": 75.11, |
|
"eval_steps_per_second": 18.777, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 2.7608146739190888e-05, |
|
"loss": 2.6778, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_loss": 3.0745816230773926, |
|
"eval_runtime": 150.773, |
|
"eval_samples_per_second": 75.159, |
|
"eval_steps_per_second": 18.79, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 2.705197939956174e-05, |
|
"loss": 2.6895, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"eval_loss": 3.071889877319336, |
|
"eval_runtime": 151.4074, |
|
"eval_samples_per_second": 74.844, |
|
"eval_steps_per_second": 18.711, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 2.6495812059932596e-05, |
|
"loss": 2.6908, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"eval_loss": 3.0682897567749023, |
|
"eval_runtime": 150.9084, |
|
"eval_samples_per_second": 75.092, |
|
"eval_steps_per_second": 18.773, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 2.5939644720303447e-05, |
|
"loss": 2.6985, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"eval_loss": 3.06658935546875, |
|
"eval_runtime": 151.6999, |
|
"eval_samples_per_second": 74.7, |
|
"eval_steps_per_second": 18.675, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 2.5383477380674298e-05, |
|
"loss": 2.6969, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"eval_loss": 3.0591042041778564, |
|
"eval_runtime": 161.1961, |
|
"eval_samples_per_second": 70.299, |
|
"eval_steps_per_second": 17.575, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 2.4827310041045153e-05, |
|
"loss": 2.7043, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 3.0611181259155273, |
|
"eval_runtime": 152.3504, |
|
"eval_samples_per_second": 74.381, |
|
"eval_steps_per_second": 18.595, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 2.4271142701416004e-05, |
|
"loss": 2.6225, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"eval_loss": 3.080932378768921, |
|
"eval_runtime": 150.5999, |
|
"eval_samples_per_second": 75.246, |
|
"eval_steps_per_second": 18.811, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 2.3714975361786858e-05, |
|
"loss": 2.636, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"eval_loss": 3.0787675380706787, |
|
"eval_runtime": 152.1353, |
|
"eval_samples_per_second": 74.486, |
|
"eval_steps_per_second": 18.622, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 2.315880802215771e-05, |
|
"loss": 2.6379, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"eval_loss": 3.0813567638397217, |
|
"eval_runtime": 150.7408, |
|
"eval_samples_per_second": 75.175, |
|
"eval_steps_per_second": 18.794, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 2.260264068252856e-05, |
|
"loss": 2.6423, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"eval_loss": 3.0757715702056885, |
|
"eval_runtime": 151.7054, |
|
"eval_samples_per_second": 74.697, |
|
"eval_steps_per_second": 18.674, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 2.2046473342899414e-05, |
|
"loss": 2.6417, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"eval_loss": 3.074010133743286, |
|
"eval_runtime": 151.6069, |
|
"eval_samples_per_second": 74.746, |
|
"eval_steps_per_second": 18.686, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 2.1490306003270265e-05, |
|
"loss": 2.6507, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"eval_loss": 3.07344388961792, |
|
"eval_runtime": 158.2388, |
|
"eval_samples_per_second": 71.613, |
|
"eval_steps_per_second": 17.903, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 2.0934138663641116e-05, |
|
"loss": 2.6058, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"eval_loss": 3.086090087890625, |
|
"eval_runtime": 151.6749, |
|
"eval_samples_per_second": 74.712, |
|
"eval_steps_per_second": 18.678, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 2.0377971324011967e-05, |
|
"loss": 2.5833, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"eval_loss": 3.086378812789917, |
|
"eval_runtime": 150.5584, |
|
"eval_samples_per_second": 75.266, |
|
"eval_steps_per_second": 18.817, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 1.982180398438282e-05, |
|
"loss": 2.5864, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"eval_loss": 3.081770896911621, |
|
"eval_runtime": 152.7908, |
|
"eval_samples_per_second": 74.167, |
|
"eval_steps_per_second": 18.542, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 1.9265636644753672e-05, |
|
"loss": 2.5952, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_loss": 3.0846848487854004, |
|
"eval_runtime": 158.7675, |
|
"eval_samples_per_second": 71.375, |
|
"eval_steps_per_second": 17.844, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 1.8709469305124527e-05, |
|
"loss": 2.6003, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"eval_loss": 3.0796427726745605, |
|
"eval_runtime": 151.7749, |
|
"eval_samples_per_second": 74.663, |
|
"eval_steps_per_second": 18.666, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 1.8153301965495378e-05, |
|
"loss": 2.6024, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"eval_loss": 3.076544761657715, |
|
"eval_runtime": 150.8625, |
|
"eval_samples_per_second": 75.115, |
|
"eval_steps_per_second": 18.779, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 1.7597134625866232e-05, |
|
"loss": 2.5883, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"eval_loss": 3.0901451110839844, |
|
"eval_runtime": 150.7672, |
|
"eval_samples_per_second": 75.162, |
|
"eval_steps_per_second": 18.791, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"learning_rate": 1.7040967286237083e-05, |
|
"loss": 2.5393, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"eval_loss": 3.0962793827056885, |
|
"eval_runtime": 158.5186, |
|
"eval_samples_per_second": 71.487, |
|
"eval_steps_per_second": 17.872, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 1.6484799946607937e-05, |
|
"loss": 2.5485, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"eval_loss": 3.0939271450042725, |
|
"eval_runtime": 150.6091, |
|
"eval_samples_per_second": 75.241, |
|
"eval_steps_per_second": 18.81, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"learning_rate": 1.5928632606978788e-05, |
|
"loss": 2.5496, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"eval_loss": 3.092724084854126, |
|
"eval_runtime": 151.4272, |
|
"eval_samples_per_second": 74.835, |
|
"eval_steps_per_second": 18.709, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"learning_rate": 1.5372465267349643e-05, |
|
"loss": 2.5577, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"eval_loss": 3.0966575145721436, |
|
"eval_runtime": 152.0136, |
|
"eval_samples_per_second": 74.546, |
|
"eval_steps_per_second": 18.636, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 1.4816297927720494e-05, |
|
"loss": 2.5598, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"eval_loss": 3.091947078704834, |
|
"eval_runtime": 150.5612, |
|
"eval_samples_per_second": 75.265, |
|
"eval_steps_per_second": 18.816, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 1.4260130588091345e-05, |
|
"loss": 2.5623, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_loss": 3.090240955352783, |
|
"eval_runtime": 151.6176, |
|
"eval_samples_per_second": 74.741, |
|
"eval_steps_per_second": 18.685, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 1.3703963248462199e-05, |
|
"loss": 2.5138, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"eval_loss": 3.103158473968506, |
|
"eval_runtime": 150.096, |
|
"eval_samples_per_second": 75.498, |
|
"eval_steps_per_second": 18.875, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"learning_rate": 1.314779590883305e-05, |
|
"loss": 2.5142, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"eval_loss": 3.1014111042022705, |
|
"eval_runtime": 151.6463, |
|
"eval_samples_per_second": 74.727, |
|
"eval_steps_per_second": 18.682, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 1.2591628569203902e-05, |
|
"loss": 2.514, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"eval_loss": 3.1006739139556885, |
|
"eval_runtime": 158.2805, |
|
"eval_samples_per_second": 71.594, |
|
"eval_steps_per_second": 17.899, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 1.2035461229574755e-05, |
|
"loss": 2.5206, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"eval_loss": 3.1010029315948486, |
|
"eval_runtime": 150.3118, |
|
"eval_samples_per_second": 75.39, |
|
"eval_steps_per_second": 18.847, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"learning_rate": 1.1479293889945606e-05, |
|
"loss": 2.52, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"eval_loss": 3.098405361175537, |
|
"eval_runtime": 156.5194, |
|
"eval_samples_per_second": 72.4, |
|
"eval_steps_per_second": 18.1, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 11.92, |
|
"learning_rate": 1.0923126550316459e-05, |
|
"loss": 2.5204, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 11.92, |
|
"eval_loss": 3.0984325408935547, |
|
"eval_runtime": 151.2164, |
|
"eval_samples_per_second": 74.939, |
|
"eval_steps_per_second": 18.735, |
|
"step": 76000 |
|
} |
|
], |
|
"max_steps": 95640, |
|
"num_train_epochs": 15, |
|
"total_flos": 1.1223588676956365e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|