|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1914, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 1.798, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6067341772151899, |
|
"eval_loss": 1.8213441371917725, |
|
"eval_runtime": 4.582, |
|
"eval_samples_per_second": 109.122, |
|
"eval_steps_per_second": 13.749, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7534, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7163, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6077215189873417, |
|
"eval_loss": 1.8046445846557617, |
|
"eval_runtime": 5.1239, |
|
"eval_samples_per_second": 97.582, |
|
"eval_steps_per_second": 12.295, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5854, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5869, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6071392405063291, |
|
"eval_loss": 1.829284906387329, |
|
"eval_runtime": 5.0393, |
|
"eval_samples_per_second": 99.221, |
|
"eval_steps_per_second": 12.502, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5039, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4165, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4349, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6042784810126582, |
|
"eval_loss": 1.89736008644104, |
|
"eval_runtime": 4.5584, |
|
"eval_samples_per_second": 109.688, |
|
"eval_steps_per_second": 13.821, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2624, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3111, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.601493670886076, |
|
"eval_loss": 1.9769378900527954, |
|
"eval_runtime": 4.4063, |
|
"eval_samples_per_second": 113.474, |
|
"eval_steps_per_second": 14.298, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 5e-05, |
|
"loss": 1.226, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1608, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 5e-05, |
|
"loss": 1.197, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.599240506329114, |
|
"eval_loss": 2.0634872913360596, |
|
"eval_runtime": 4.8833, |
|
"eval_samples_per_second": 102.389, |
|
"eval_steps_per_second": 12.901, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0336, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0729, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5975443037974684, |
|
"eval_loss": 2.1523237228393555, |
|
"eval_runtime": 4.8877, |
|
"eval_samples_per_second": 102.297, |
|
"eval_steps_per_second": 12.889, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0005, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9437, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9833, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5947341772151898, |
|
"eval_loss": 2.2639756202697754, |
|
"eval_runtime": 4.3964, |
|
"eval_samples_per_second": 113.73, |
|
"eval_steps_per_second": 14.33, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.823, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8672, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5924050632911393, |
|
"eval_loss": 2.3642578125, |
|
"eval_runtime": 4.6579, |
|
"eval_samples_per_second": 107.345, |
|
"eval_steps_per_second": 13.526, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8032, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.7566, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.7883, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5908101265822785, |
|
"eval_loss": 2.4598007202148438, |
|
"eval_runtime": 4.6476, |
|
"eval_samples_per_second": 107.582, |
|
"eval_steps_per_second": 13.555, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6488, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6879, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.588987341772152, |
|
"eval_loss": 2.5668857097625732, |
|
"eval_runtime": 4.4085, |
|
"eval_samples_per_second": 113.416, |
|
"eval_steps_per_second": 14.29, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6502, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.602, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6295, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5885316455696202, |
|
"eval_loss": 2.700011730194092, |
|
"eval_runtime": 4.8719, |
|
"eval_samples_per_second": 102.63, |
|
"eval_steps_per_second": 12.931, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5228, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5545, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5850886075949367, |
|
"eval_loss": 2.8281185626983643, |
|
"eval_runtime": 4.4033, |
|
"eval_samples_per_second": 113.551, |
|
"eval_steps_per_second": 14.307, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5244, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4924, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5208, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5852911392405064, |
|
"eval_loss": 2.879397392272949, |
|
"eval_runtime": 4.703, |
|
"eval_samples_per_second": 106.315, |
|
"eval_steps_per_second": 13.396, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4405, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4679, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.586253164556962, |
|
"eval_loss": 2.9183950424194336, |
|
"eval_runtime": 4.5629, |
|
"eval_samples_per_second": 109.579, |
|
"eval_steps_per_second": 13.807, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4443, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4235, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4464, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5852151898734177, |
|
"eval_loss": 3.0790698528289795, |
|
"eval_runtime": 4.6021, |
|
"eval_samples_per_second": 108.647, |
|
"eval_steps_per_second": 13.689, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3919, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4136, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5855696202531645, |
|
"eval_loss": 3.0832109451293945, |
|
"eval_runtime": 5.1254, |
|
"eval_samples_per_second": 97.553, |
|
"eval_steps_per_second": 12.292, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3902, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3841, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4021, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5846835443037974, |
|
"eval_loss": 3.0944228172302246, |
|
"eval_runtime": 4.5552, |
|
"eval_samples_per_second": 109.765, |
|
"eval_steps_per_second": 13.83, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3577, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3776, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.5828354430379746, |
|
"eval_loss": 3.2120306491851807, |
|
"eval_runtime": 4.7023, |
|
"eval_samples_per_second": 106.332, |
|
"eval_steps_per_second": 13.398, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3617, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3575, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.373, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5839240506329114, |
|
"eval_loss": 3.229841947555542, |
|
"eval_runtime": 4.7155, |
|
"eval_samples_per_second": 106.033, |
|
"eval_steps_per_second": 13.36, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 5000, |
|
"total_flos": 3.1967425075347456e+17, |
|
"train_loss": 0.8756309669494629, |
|
"train_runtime": 3519.8357, |
|
"train_samples_per_second": 45.457, |
|
"train_steps_per_second": 1.421 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 5000, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 3.1967425075347456e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|