|
{ |
|
"best_metric": 2.396937131881714, |
|
"best_model_checkpoint": "./model_fine-tune/glot/xlm-r/jav-Latn/checkpoint-6000", |
|
"epoch": 20.338983050847457, |
|
"eval_steps": 500, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.694915254237288, |
|
"grad_norm": 4.60462760925293, |
|
"learning_rate": 9.95e-05, |
|
"loss": 1.8563, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.694915254237288, |
|
"eval_accuracy": 0.7129037313539158, |
|
"eval_loss": 2.615060329437256, |
|
"eval_runtime": 16.1415, |
|
"eval_samples_per_second": 261.934, |
|
"eval_steps_per_second": 8.24, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.389830508474576, |
|
"grad_norm": 4.434678077697754, |
|
"learning_rate": 9.900000000000001e-05, |
|
"loss": 1.6006, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.389830508474576, |
|
"eval_accuracy": 0.7279286229632328, |
|
"eval_loss": 2.511897563934326, |
|
"eval_runtime": 16.7992, |
|
"eval_samples_per_second": 251.678, |
|
"eval_steps_per_second": 7.917, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.084745762711864, |
|
"grad_norm": 3.77703595161438, |
|
"learning_rate": 9.850000000000001e-05, |
|
"loss": 1.4826, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.084745762711864, |
|
"eval_accuracy": 0.739675804970421, |
|
"eval_loss": 2.5077760219573975, |
|
"eval_runtime": 17.3002, |
|
"eval_samples_per_second": 244.39, |
|
"eval_steps_per_second": 7.688, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.779661016949152, |
|
"grad_norm": 3.7486958503723145, |
|
"learning_rate": 9.8e-05, |
|
"loss": 1.4095, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.779661016949152, |
|
"eval_accuracy": 0.742510144555458, |
|
"eval_loss": 2.4969942569732666, |
|
"eval_runtime": 17.6233, |
|
"eval_samples_per_second": 239.91, |
|
"eval_steps_per_second": 7.547, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.474576271186441, |
|
"grad_norm": 3.9276657104492188, |
|
"learning_rate": 9.75e-05, |
|
"loss": 1.3522, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.474576271186441, |
|
"eval_accuracy": 0.7483269445491463, |
|
"eval_loss": 2.483748197555542, |
|
"eval_runtime": 18.2776, |
|
"eval_samples_per_second": 231.322, |
|
"eval_steps_per_second": 7.277, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.169491525423728, |
|
"grad_norm": 3.0509426593780518, |
|
"learning_rate": 9.7e-05, |
|
"loss": 1.3055, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.169491525423728, |
|
"eval_accuracy": 0.7534293503861141, |
|
"eval_loss": 2.4522793292999268, |
|
"eval_runtime": 17.8658, |
|
"eval_samples_per_second": 236.654, |
|
"eval_steps_per_second": 7.444, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.864406779661017, |
|
"grad_norm": 3.749621629714966, |
|
"learning_rate": 9.65e-05, |
|
"loss": 1.2637, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 11.864406779661017, |
|
"eval_accuracy": 0.7564406320026262, |
|
"eval_loss": 2.5035338401794434, |
|
"eval_runtime": 16.226, |
|
"eval_samples_per_second": 260.569, |
|
"eval_steps_per_second": 8.197, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.559322033898304, |
|
"grad_norm": 3.407952308654785, |
|
"learning_rate": 9.6e-05, |
|
"loss": 1.2289, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.559322033898304, |
|
"eval_accuracy": 0.7572301628520813, |
|
"eval_loss": 2.5971286296844482, |
|
"eval_runtime": 18.6487, |
|
"eval_samples_per_second": 226.718, |
|
"eval_steps_per_second": 7.132, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.254237288135593, |
|
"grad_norm": 3.5328588485717773, |
|
"learning_rate": 9.55e-05, |
|
"loss": 1.2005, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.254237288135593, |
|
"eval_accuracy": 0.760487028966383, |
|
"eval_loss": 2.424844980239868, |
|
"eval_runtime": 18.0059, |
|
"eval_samples_per_second": 234.812, |
|
"eval_steps_per_second": 7.386, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 16.949152542372882, |
|
"grad_norm": 3.54754638671875, |
|
"learning_rate": 9.5e-05, |
|
"loss": 1.1669, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 16.949152542372882, |
|
"eval_accuracy": 0.7611065918853734, |
|
"eval_loss": 2.494778871536255, |
|
"eval_runtime": 18.045, |
|
"eval_samples_per_second": 234.304, |
|
"eval_steps_per_second": 7.37, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 18.64406779661017, |
|
"grad_norm": 3.227668046951294, |
|
"learning_rate": 9.449999999999999e-05, |
|
"loss": 1.1376, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.64406779661017, |
|
"eval_accuracy": 0.7634028732398379, |
|
"eval_loss": 2.465242862701416, |
|
"eval_runtime": 17.9581, |
|
"eval_samples_per_second": 235.437, |
|
"eval_steps_per_second": 7.406, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 20.338983050847457, |
|
"grad_norm": 3.670949697494507, |
|
"learning_rate": 9.4e-05, |
|
"loss": 1.1196, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.338983050847457, |
|
"eval_accuracy": 0.7678996847315939, |
|
"eval_loss": 2.396937131881714, |
|
"eval_runtime": 17.9579, |
|
"eval_samples_per_second": 235.439, |
|
"eval_steps_per_second": 7.406, |
|
"step": 6000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 100000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 339, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.066477747463782e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|