mDeBERTa-v3-base-Assamese / trainer_state.json
pritamdeka's picture
Upload folder using huggingface_hub
72eacba verified
{
"best_metric": 0.8285950217539291,
"best_model_checkpoint": "/content/temp_assamese/checkpoint-50000",
"epoch": 1.0,
"eval_steps": 5000,
"global_step": 53713,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09308733453726287,
"grad_norm": 5.457111358642578,
"learning_rate": 4.5348425893172976e-05,
"loss": 2.4466,
"step": 5000
},
{
"epoch": 0.09308733453726287,
"eval_accuracy": 0.707468341537135,
"eval_loss": 1.5004358291625977,
"eval_runtime": 308.6127,
"eval_samples_per_second": 146.822,
"eval_steps_per_second": 9.177,
"step": 5000
},
{
"epoch": 0.18617466907452573,
"grad_norm": 4.776674270629883,
"learning_rate": 4.0694990039655205e-05,
"loss": 1.4994,
"step": 10000
},
{
"epoch": 0.18617466907452573,
"eval_accuracy": 0.7532453087820641,
"eval_loss": 1.2256046533584595,
"eval_runtime": 306.864,
"eval_samples_per_second": 147.658,
"eval_steps_per_second": 9.229,
"step": 10000
},
{
"epoch": 0.2792620036117886,
"grad_norm": 3.9309194087982178,
"learning_rate": 3.604248505948281e-05,
"loss": 1.2888,
"step": 15000
},
{
"epoch": 0.2792620036117886,
"eval_accuracy": 0.7765668816186476,
"eval_loss": 1.099416732788086,
"eval_runtime": 321.8088,
"eval_samples_per_second": 140.801,
"eval_steps_per_second": 8.8,
"step": 15000
},
{
"epoch": 0.37234933814905147,
"grad_norm": 3.988945722579956,
"learning_rate": 3.1389980079310413e-05,
"loss": 1.1746,
"step": 20000
},
{
"epoch": 0.37234933814905147,
"eval_accuracy": 0.791549800931217,
"eval_loss": 1.0090231895446777,
"eval_runtime": 316.5344,
"eval_samples_per_second": 143.147,
"eval_steps_per_second": 8.947,
"step": 20000
},
{
"epoch": 0.4654366726863143,
"grad_norm": 4.230010509490967,
"learning_rate": 2.6736544225792642e-05,
"loss": 1.0994,
"step": 25000
},
{
"epoch": 0.4654366726863143,
"eval_accuracy": 0.8021278468205446,
"eval_loss": 0.9513992667198181,
"eval_runtime": 317.1699,
"eval_samples_per_second": 142.86,
"eval_steps_per_second": 8.929,
"step": 25000
},
{
"epoch": 0.5585240072235772,
"grad_norm": 4.287986755371094,
"learning_rate": 2.2084970118965616e-05,
"loss": 1.0379,
"step": 30000
},
{
"epoch": 0.5585240072235772,
"eval_accuracy": 0.8115293649487124,
"eval_loss": 0.9028974771499634,
"eval_runtime": 316.9066,
"eval_samples_per_second": 142.979,
"eval_steps_per_second": 8.936,
"step": 30000
},
{
"epoch": 0.65161134176084,
"grad_norm": 3.521850347518921,
"learning_rate": 1.743339601213859e-05,
"loss": 0.9956,
"step": 35000
},
{
"epoch": 0.65161134176084,
"eval_accuracy": 0.8174002465681974,
"eval_loss": 0.8695101737976074,
"eval_runtime": 316.9452,
"eval_samples_per_second": 142.962,
"eval_steps_per_second": 8.935,
"step": 35000
},
{
"epoch": 0.7446986762981029,
"grad_norm": 4.046538829803467,
"learning_rate": 1.2779960158620818e-05,
"loss": 0.9647,
"step": 40000
},
{
"epoch": 0.7446986762981029,
"eval_accuracy": 0.8216175421669631,
"eval_loss": 0.8461592793464661,
"eval_runtime": 318.3007,
"eval_samples_per_second": 142.353,
"eval_steps_per_second": 8.897,
"step": 40000
},
{
"epoch": 0.8377860108353657,
"grad_norm": 4.023233413696289,
"learning_rate": 8.12745517844842e-06,
"loss": 0.9351,
"step": 45000
},
{
"epoch": 0.8377860108353657,
"eval_accuracy": 0.8258444821249434,
"eval_loss": 0.8274036645889282,
"eval_runtime": 318.395,
"eval_samples_per_second": 142.311,
"eval_steps_per_second": 8.895,
"step": 45000
},
{
"epoch": 0.9308733453726286,
"grad_norm": 3.7155344486236572,
"learning_rate": 3.4749501982760224e-06,
"loss": 0.9194,
"step": 50000
},
{
"epoch": 0.9308733453726286,
"eval_accuracy": 0.8285950217539291,
"eval_loss": 0.8120360374450684,
"eval_runtime": 309.307,
"eval_samples_per_second": 146.492,
"eval_steps_per_second": 9.156,
"step": 50000
},
{
"epoch": 1.0,
"step": 53713,
"total_flos": 1.1339138340497818e+17,
"train_loss": 1.2134282816267128,
"train_runtime": 14742.2291,
"train_samples_per_second": 58.295,
"train_steps_per_second": 3.643
}
],
"logging_steps": 5000,
"max_steps": 53713,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1339138340497818e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}