xlm-r_jav-latn / trainer_state.json
DGurgurov's picture
Uploading checkpoint-6000 for xlm-r - jav-latn
cbc620b verified
{
"best_metric": 2.396937131881714,
"best_model_checkpoint": "./model_fine-tune/glot/xlm-r/jav-Latn/checkpoint-6000",
"epoch": 20.338983050847457,
"eval_steps": 500,
"global_step": 6000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.694915254237288,
"grad_norm": 4.60462760925293,
"learning_rate": 9.95e-05,
"loss": 1.8563,
"step": 500
},
{
"epoch": 1.694915254237288,
"eval_accuracy": 0.7129037313539158,
"eval_loss": 2.615060329437256,
"eval_runtime": 16.1415,
"eval_samples_per_second": 261.934,
"eval_steps_per_second": 8.24,
"step": 500
},
{
"epoch": 3.389830508474576,
"grad_norm": 4.434678077697754,
"learning_rate": 9.900000000000001e-05,
"loss": 1.6006,
"step": 1000
},
{
"epoch": 3.389830508474576,
"eval_accuracy": 0.7279286229632328,
"eval_loss": 2.511897563934326,
"eval_runtime": 16.7992,
"eval_samples_per_second": 251.678,
"eval_steps_per_second": 7.917,
"step": 1000
},
{
"epoch": 5.084745762711864,
"grad_norm": 3.77703595161438,
"learning_rate": 9.850000000000001e-05,
"loss": 1.4826,
"step": 1500
},
{
"epoch": 5.084745762711864,
"eval_accuracy": 0.739675804970421,
"eval_loss": 2.5077760219573975,
"eval_runtime": 17.3002,
"eval_samples_per_second": 244.39,
"eval_steps_per_second": 7.688,
"step": 1500
},
{
"epoch": 6.779661016949152,
"grad_norm": 3.7486958503723145,
"learning_rate": 9.8e-05,
"loss": 1.4095,
"step": 2000
},
{
"epoch": 6.779661016949152,
"eval_accuracy": 0.742510144555458,
"eval_loss": 2.4969942569732666,
"eval_runtime": 17.6233,
"eval_samples_per_second": 239.91,
"eval_steps_per_second": 7.547,
"step": 2000
},
{
"epoch": 8.474576271186441,
"grad_norm": 3.9276657104492188,
"learning_rate": 9.75e-05,
"loss": 1.3522,
"step": 2500
},
{
"epoch": 8.474576271186441,
"eval_accuracy": 0.7483269445491463,
"eval_loss": 2.483748197555542,
"eval_runtime": 18.2776,
"eval_samples_per_second": 231.322,
"eval_steps_per_second": 7.277,
"step": 2500
},
{
"epoch": 10.169491525423728,
"grad_norm": 3.0509426593780518,
"learning_rate": 9.7e-05,
"loss": 1.3055,
"step": 3000
},
{
"epoch": 10.169491525423728,
"eval_accuracy": 0.7534293503861141,
"eval_loss": 2.4522793292999268,
"eval_runtime": 17.8658,
"eval_samples_per_second": 236.654,
"eval_steps_per_second": 7.444,
"step": 3000
},
{
"epoch": 11.864406779661017,
"grad_norm": 3.749621629714966,
"learning_rate": 9.65e-05,
"loss": 1.2637,
"step": 3500
},
{
"epoch": 11.864406779661017,
"eval_accuracy": 0.7564406320026262,
"eval_loss": 2.5035338401794434,
"eval_runtime": 16.226,
"eval_samples_per_second": 260.569,
"eval_steps_per_second": 8.197,
"step": 3500
},
{
"epoch": 13.559322033898304,
"grad_norm": 3.407952308654785,
"learning_rate": 9.6e-05,
"loss": 1.2289,
"step": 4000
},
{
"epoch": 13.559322033898304,
"eval_accuracy": 0.7572301628520813,
"eval_loss": 2.5971286296844482,
"eval_runtime": 18.6487,
"eval_samples_per_second": 226.718,
"eval_steps_per_second": 7.132,
"step": 4000
},
{
"epoch": 15.254237288135593,
"grad_norm": 3.5328588485717773,
"learning_rate": 9.55e-05,
"loss": 1.2005,
"step": 4500
},
{
"epoch": 15.254237288135593,
"eval_accuracy": 0.760487028966383,
"eval_loss": 2.424844980239868,
"eval_runtime": 18.0059,
"eval_samples_per_second": 234.812,
"eval_steps_per_second": 7.386,
"step": 4500
},
{
"epoch": 16.949152542372882,
"grad_norm": 3.54754638671875,
"learning_rate": 9.5e-05,
"loss": 1.1669,
"step": 5000
},
{
"epoch": 16.949152542372882,
"eval_accuracy": 0.7611065918853734,
"eval_loss": 2.494778871536255,
"eval_runtime": 18.045,
"eval_samples_per_second": 234.304,
"eval_steps_per_second": 7.37,
"step": 5000
},
{
"epoch": 18.64406779661017,
"grad_norm": 3.227668046951294,
"learning_rate": 9.449999999999999e-05,
"loss": 1.1376,
"step": 5500
},
{
"epoch": 18.64406779661017,
"eval_accuracy": 0.7634028732398379,
"eval_loss": 2.465242862701416,
"eval_runtime": 17.9581,
"eval_samples_per_second": 235.437,
"eval_steps_per_second": 7.406,
"step": 5500
},
{
"epoch": 20.338983050847457,
"grad_norm": 3.670949697494507,
"learning_rate": 9.4e-05,
"loss": 1.1196,
"step": 6000
},
{
"epoch": 20.338983050847457,
"eval_accuracy": 0.7678996847315939,
"eval_loss": 2.396937131881714,
"eval_runtime": 17.9579,
"eval_samples_per_second": 235.439,
"eval_steps_per_second": 7.406,
"step": 6000
}
],
"logging_steps": 500,
"max_steps": 100000,
"num_input_tokens_seen": 0,
"num_train_epochs": 339,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5.066477747463782e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}