|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 37815, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9338886685177844e-05, |
|
"loss": 3.2991, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8677773370355686e-05, |
|
"loss": 2.7662, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.801666005553352e-05, |
|
"loss": 2.662, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.735554674071136e-05, |
|
"loss": 2.5305, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.66944334258892e-05, |
|
"loss": 2.4368, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.603332011106704e-05, |
|
"loss": 2.4286, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5372206796244874e-05, |
|
"loss": 2.3404, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4711093481422716e-05, |
|
"loss": 2.2398, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.404998016660056e-05, |
|
"loss": 2.2966, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.33888668517784e-05, |
|
"loss": 2.2255, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.272775353695624e-05, |
|
"loss": 2.2713, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.2066640222134076e-05, |
|
"loss": 2.2018, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.140552690731192e-05, |
|
"loss": 2.1203, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.074441359248975e-05, |
|
"loss": 2.1148, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.0083300277667595e-05, |
|
"loss": 2.1149, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.942218696284543e-05, |
|
"loss": 2.0219, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.876107364802327e-05, |
|
"loss": 2.0354, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.809996033320111e-05, |
|
"loss": 2.0341, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.7438847018378955e-05, |
|
"loss": 1.9972, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6777733703556796e-05, |
|
"loss": 1.9654, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.611662038873463e-05, |
|
"loss": 1.9853, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.5455507073912466e-05, |
|
"loss": 1.9487, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.479439375909031e-05, |
|
"loss": 1.9498, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.413328044426815e-05, |
|
"loss": 1.8963, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.3472167129445985e-05, |
|
"loss": 1.9259, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.2811053814623827e-05, |
|
"loss": 1.5878, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.214994049980167e-05, |
|
"loss": 1.4017, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.148882718497951e-05, |
|
"loss": 1.4809, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.082771387015735e-05, |
|
"loss": 1.4646, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0166600555335183e-05, |
|
"loss": 1.5017, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9505487240513025e-05, |
|
"loss": 1.4745, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.8844373925690867e-05, |
|
"loss": 1.4496, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.8183260610868705e-05, |
|
"loss": 1.4599, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.752214729604654e-05, |
|
"loss": 1.3974, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.6861033981224382e-05, |
|
"loss": 1.397, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6199920666402224e-05, |
|
"loss": 1.436, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5538807351580062e-05, |
|
"loss": 1.4359, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.48776940367579e-05, |
|
"loss": 1.4215, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4216580721935742e-05, |
|
"loss": 1.3611, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.355546740711358e-05, |
|
"loss": 1.4515, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.289435409229142e-05, |
|
"loss": 1.3923, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2233240777469257e-05, |
|
"loss": 1.3968, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.15721274626471e-05, |
|
"loss": 1.4511, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.091101414782494e-05, |
|
"loss": 1.3736, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.024990083300278e-05, |
|
"loss": 1.4196, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.9588787518180617e-05, |
|
"loss": 1.4012, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.8927674203358456e-05, |
|
"loss": 1.3995, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.8266560888536297e-05, |
|
"loss": 1.3706, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7605447573714136e-05, |
|
"loss": 1.357, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.6944334258891974e-05, |
|
"loss": 1.3894, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.6283220944069812e-05, |
|
"loss": 1.1266, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.5622107629247654e-05, |
|
"loss": 0.9034, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.4960994314425494e-05, |
|
"loss": 0.88, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.4299880999603333e-05, |
|
"loss": 0.9379, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3638767684781173e-05, |
|
"loss": 0.9224, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.2977654369959011e-05, |
|
"loss": 0.9108, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2316541055136851e-05, |
|
"loss": 0.9068, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1655427740314691e-05, |
|
"loss": 0.9158, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.099431442549253e-05, |
|
"loss": 0.901, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.033320111067037e-05, |
|
"loss": 0.8898, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.67208779584821e-06, |
|
"loss": 0.9295, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.010974481026048e-06, |
|
"loss": 0.9325, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.349861166203888e-06, |
|
"loss": 0.9357, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.688747851381726e-06, |
|
"loss": 0.8832, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.027634536559567e-06, |
|
"loss": 0.9101, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.366521221737406e-06, |
|
"loss": 0.9018, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.7054079069152455e-06, |
|
"loss": 0.8886, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.044294592093085e-06, |
|
"loss": 0.8771, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.383181277270925e-06, |
|
"loss": 0.8956, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.7220679624487635e-06, |
|
"loss": 0.8586, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.060954647626603e-06, |
|
"loss": 0.9039, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.3998413328044427e-06, |
|
"loss": 0.8817, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7387280179822822e-06, |
|
"loss": 0.8601, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0776147031601218e-06, |
|
"loss": 0.8837, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.165013883379611e-07, |
|
"loss": 0.8894, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 37815, |
|
"total_flos": 9288563680542720.0, |
|
"train_loss": 1.5174339204652274, |
|
"train_runtime": 9956.4506, |
|
"train_samples_per_second": 15.192, |
|
"train_steps_per_second": 3.798 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 37815, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 9288563680542720.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|