model / run-1 /checkpoint-250 /trainer_state.json
Zlovoblachko's picture
End of training
da94b45 verified
raw
history blame
7.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 25,
"global_step": 250,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"grad_norm": 219.40835571289062,
"learning_rate": 0.00018801838248448536,
"loss": 115.2915,
"step": 10
},
{
"epoch": 0.08,
"grad_norm": 62.35649108886719,
"learning_rate": 0.00018018428321429848,
"loss": 19.8096,
"step": 20
},
{
"epoch": 0.1,
"eval_loss": 2.5786311626434326,
"eval_meteor": 0.0,
"eval_rouge": 0.0,
"eval_runtime": 185.8184,
"eval_samples_per_second": 1.345,
"eval_steps_per_second": 1.345,
"step": 25
},
{
"epoch": 0.12,
"grad_norm": 76.14645385742188,
"learning_rate": 0.00017235018394411159,
"loss": 9.7015,
"step": 30
},
{
"epoch": 0.16,
"grad_norm": 6.961805820465088,
"learning_rate": 0.00016451608467392469,
"loss": 2.9942,
"step": 40
},
{
"epoch": 0.2,
"grad_norm": 43.01859664916992,
"learning_rate": 0.00015668198540373781,
"loss": 1.427,
"step": 50
},
{
"epoch": 0.2,
"eval_loss": 0.28364765644073486,
"eval_meteor": 0.02273246229783531,
"eval_rouge": 0.03521551652763062,
"eval_runtime": 185.2799,
"eval_samples_per_second": 1.349,
"eval_steps_per_second": 1.349,
"step": 50
},
{
"epoch": 0.24,
"grad_norm": 1.4709396362304688,
"learning_rate": 0.00014884788613355092,
"loss": 0.9872,
"step": 60
},
{
"epoch": 0.28,
"grad_norm": 1.0122220516204834,
"learning_rate": 0.00014101378686336402,
"loss": 0.7634,
"step": 70
},
{
"epoch": 0.3,
"eval_loss": 0.14952343702316284,
"eval_meteor": 0.07689507069769044,
"eval_rouge": 0.10951199778067923,
"eval_runtime": 183.0423,
"eval_samples_per_second": 1.366,
"eval_steps_per_second": 1.366,
"step": 75
},
{
"epoch": 0.32,
"grad_norm": 20.24289321899414,
"learning_rate": 0.00013317968759317715,
"loss": 0.6099,
"step": 80
},
{
"epoch": 0.36,
"grad_norm": 0.6482983827590942,
"learning_rate": 0.00012534558832299025,
"loss": 0.5676,
"step": 90
},
{
"epoch": 0.4,
"grad_norm": 1.0459566116333008,
"learning_rate": 0.00011751148905280335,
"loss": 0.5225,
"step": 100
},
{
"epoch": 0.4,
"eval_loss": 0.12942516803741455,
"eval_meteor": 0.09042579962176768,
"eval_rouge": 0.1346603229762297,
"eval_runtime": 183.3098,
"eval_samples_per_second": 1.364,
"eval_steps_per_second": 1.364,
"step": 100
},
{
"epoch": 0.44,
"grad_norm": 0.8052979111671448,
"learning_rate": 0.00010967738978261648,
"loss": 0.515,
"step": 110
},
{
"epoch": 0.48,
"grad_norm": 0.8996737599372864,
"learning_rate": 0.00010184329051242958,
"loss": 0.5177,
"step": 120
},
{
"epoch": 0.5,
"eval_loss": 0.12226101756095886,
"eval_meteor": 0.10434284222368143,
"eval_rouge": 0.15413964328066987,
"eval_runtime": 184.6588,
"eval_samples_per_second": 1.354,
"eval_steps_per_second": 1.354,
"step": 125
},
{
"epoch": 0.52,
"grad_norm": 0.6967951655387878,
"learning_rate": 9.400919124224268e-05,
"loss": 0.5249,
"step": 130
},
{
"epoch": 0.56,
"grad_norm": 0.5539537072181702,
"learning_rate": 8.617509197205579e-05,
"loss": 0.4336,
"step": 140
},
{
"epoch": 0.6,
"grad_norm": 0.6934689283370972,
"learning_rate": 7.834099270186891e-05,
"loss": 0.4581,
"step": 150
},
{
"epoch": 0.6,
"eval_loss": 0.12093699723482132,
"eval_meteor": 0.11141702521015225,
"eval_rouge": 0.16876141956675836,
"eval_runtime": 184.3427,
"eval_samples_per_second": 1.356,
"eval_steps_per_second": 1.356,
"step": 150
},
{
"epoch": 0.64,
"grad_norm": 3.469697952270508,
"learning_rate": 7.050689343168201e-05,
"loss": 0.4494,
"step": 160
},
{
"epoch": 0.68,
"grad_norm": 1.0301263332366943,
"learning_rate": 6.267279416149512e-05,
"loss": 0.4498,
"step": 170
},
{
"epoch": 0.7,
"eval_loss": 0.11946863681077957,
"eval_meteor": 0.11051272177679479,
"eval_rouge": 0.16635535722733613,
"eval_runtime": 185.0704,
"eval_samples_per_second": 1.351,
"eval_steps_per_second": 1.351,
"step": 175
},
{
"epoch": 0.72,
"grad_norm": 0.7396179437637329,
"learning_rate": 5.483869489130824e-05,
"loss": 0.7089,
"step": 180
},
{
"epoch": 0.76,
"grad_norm": 1.0098114013671875,
"learning_rate": 4.700459562112134e-05,
"loss": 0.4616,
"step": 190
},
{
"epoch": 0.8,
"grad_norm": 119.26329803466797,
"learning_rate": 3.9170496350934454e-05,
"loss": 0.4699,
"step": 200
},
{
"epoch": 0.8,
"eval_loss": 0.11950699985027313,
"eval_meteor": 0.11040707687984884,
"eval_rouge": 0.16702139582886516,
"eval_runtime": 185.055,
"eval_samples_per_second": 1.351,
"eval_steps_per_second": 1.351,
"step": 200
},
{
"epoch": 0.84,
"grad_norm": 0.8761780858039856,
"learning_rate": 3.133639708074756e-05,
"loss": 0.9796,
"step": 210
},
{
"epoch": 0.88,
"grad_norm": 0.6994351148605347,
"learning_rate": 2.350229781056067e-05,
"loss": 0.4515,
"step": 220
},
{
"epoch": 0.9,
"eval_loss": 0.1189429759979248,
"eval_meteor": 0.11115742614650127,
"eval_rouge": 0.17056126666712712,
"eval_runtime": 186.9575,
"eval_samples_per_second": 1.337,
"eval_steps_per_second": 1.337,
"step": 225
},
{
"epoch": 0.92,
"grad_norm": 0.6938163638114929,
"learning_rate": 1.566819854037378e-05,
"loss": 0.4165,
"step": 230
},
{
"epoch": 0.96,
"grad_norm": 0.7089868783950806,
"learning_rate": 7.83409927018689e-06,
"loss": 0.4554,
"step": 240
},
{
"epoch": 1.0,
"grad_norm": 0.742696225643158,
"learning_rate": 0.0,
"loss": 0.461,
"step": 250
},
{
"epoch": 1.0,
"eval_loss": 0.11835635453462601,
"eval_meteor": 0.11444057098479471,
"eval_rouge": 0.1729760215295089,
"eval_runtime": 187.724,
"eval_samples_per_second": 1.332,
"eval_steps_per_second": 1.332,
"step": 250
}
],
"logging_steps": 10,
"max_steps": 250,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1027178496000000.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": {
"gradient_accumulation_steps": 3,
"learning_rate": 0.00019585248175467225,
"num_train_epochs": 1
}
}