|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.7359781121751023, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2598, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2292, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1966, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2014, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1835, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2463, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2303, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1892, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1961, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1219, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1738, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1226, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1246, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1479, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2111, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1836, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1366, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1682, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1431, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1472, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0244, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1281, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0264, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1049, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1054, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0381, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0953, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1273, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0747, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1288, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0656, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1188, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0525, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0002, |
|
"loss": 1.057, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1001, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1505, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0715, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9598, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.986, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.991, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9805, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9839, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9978, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9476, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9543, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9569, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.0002, |
|
"loss": 1.002, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.0002, |
|
"loss": 1.012, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0095, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9938, |
|
"step": 500 |
|
} |
|
], |
|
"max_steps": 500, |
|
"num_train_epochs": 3, |
|
"total_flos": 8.459308833806746e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|