|
{ |
|
"best_metric": 0.8285950217539291, |
|
"best_model_checkpoint": "/content/temp_assamese/checkpoint-50000", |
|
"epoch": 1.0, |
|
"eval_steps": 5000, |
|
"global_step": 53713, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09308733453726287, |
|
"grad_norm": 5.457111358642578, |
|
"learning_rate": 4.5348425893172976e-05, |
|
"loss": 2.4466, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.09308733453726287, |
|
"eval_accuracy": 0.707468341537135, |
|
"eval_loss": 1.5004358291625977, |
|
"eval_runtime": 308.6127, |
|
"eval_samples_per_second": 146.822, |
|
"eval_steps_per_second": 9.177, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.18617466907452573, |
|
"grad_norm": 4.776674270629883, |
|
"learning_rate": 4.0694990039655205e-05, |
|
"loss": 1.4994, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.18617466907452573, |
|
"eval_accuracy": 0.7532453087820641, |
|
"eval_loss": 1.2256046533584595, |
|
"eval_runtime": 306.864, |
|
"eval_samples_per_second": 147.658, |
|
"eval_steps_per_second": 9.229, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.2792620036117886, |
|
"grad_norm": 3.9309194087982178, |
|
"learning_rate": 3.604248505948281e-05, |
|
"loss": 1.2888, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.2792620036117886, |
|
"eval_accuracy": 0.7765668816186476, |
|
"eval_loss": 1.099416732788086, |
|
"eval_runtime": 321.8088, |
|
"eval_samples_per_second": 140.801, |
|
"eval_steps_per_second": 8.8, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.37234933814905147, |
|
"grad_norm": 3.988945722579956, |
|
"learning_rate": 3.1389980079310413e-05, |
|
"loss": 1.1746, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.37234933814905147, |
|
"eval_accuracy": 0.791549800931217, |
|
"eval_loss": 1.0090231895446777, |
|
"eval_runtime": 316.5344, |
|
"eval_samples_per_second": 143.147, |
|
"eval_steps_per_second": 8.947, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.4654366726863143, |
|
"grad_norm": 4.230010509490967, |
|
"learning_rate": 2.6736544225792642e-05, |
|
"loss": 1.0994, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.4654366726863143, |
|
"eval_accuracy": 0.8021278468205446, |
|
"eval_loss": 0.9513992667198181, |
|
"eval_runtime": 317.1699, |
|
"eval_samples_per_second": 142.86, |
|
"eval_steps_per_second": 8.929, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.5585240072235772, |
|
"grad_norm": 4.287986755371094, |
|
"learning_rate": 2.2084970118965616e-05, |
|
"loss": 1.0379, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.5585240072235772, |
|
"eval_accuracy": 0.8115293649487124, |
|
"eval_loss": 0.9028974771499634, |
|
"eval_runtime": 316.9066, |
|
"eval_samples_per_second": 142.979, |
|
"eval_steps_per_second": 8.936, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.65161134176084, |
|
"grad_norm": 3.521850347518921, |
|
"learning_rate": 1.743339601213859e-05, |
|
"loss": 0.9956, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.65161134176084, |
|
"eval_accuracy": 0.8174002465681974, |
|
"eval_loss": 0.8695101737976074, |
|
"eval_runtime": 316.9452, |
|
"eval_samples_per_second": 142.962, |
|
"eval_steps_per_second": 8.935, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.7446986762981029, |
|
"grad_norm": 4.046538829803467, |
|
"learning_rate": 1.2779960158620818e-05, |
|
"loss": 0.9647, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.7446986762981029, |
|
"eval_accuracy": 0.8216175421669631, |
|
"eval_loss": 0.8461592793464661, |
|
"eval_runtime": 318.3007, |
|
"eval_samples_per_second": 142.353, |
|
"eval_steps_per_second": 8.897, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.8377860108353657, |
|
"grad_norm": 4.023233413696289, |
|
"learning_rate": 8.12745517844842e-06, |
|
"loss": 0.9351, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.8377860108353657, |
|
"eval_accuracy": 0.8258444821249434, |
|
"eval_loss": 0.8274036645889282, |
|
"eval_runtime": 318.395, |
|
"eval_samples_per_second": 142.311, |
|
"eval_steps_per_second": 8.895, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.9308733453726286, |
|
"grad_norm": 3.7155344486236572, |
|
"learning_rate": 3.4749501982760224e-06, |
|
"loss": 0.9194, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.9308733453726286, |
|
"eval_accuracy": 0.8285950217539291, |
|
"eval_loss": 0.8120360374450684, |
|
"eval_runtime": 309.307, |
|
"eval_samples_per_second": 146.492, |
|
"eval_steps_per_second": 9.156, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 53713, |
|
"total_flos": 1.1339138340497818e+17, |
|
"train_loss": 1.2134282816267128, |
|
"train_runtime": 14742.2291, |
|
"train_samples_per_second": 58.295, |
|
"train_steps_per_second": 3.643 |
|
} |
|
], |
|
"logging_steps": 5000, |
|
"max_steps": 53713, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1339138340497818e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|