|
{ |
|
"best_metric": 0.37990298867225647, |
|
"best_model_checkpoint": "../KevinKibe/nllb-200-distilled-1.3B-finetuned-finetuned-finetuned/checkpoint-600", |
|
"epoch": 1400.0, |
|
"eval_steps": 200, |
|
"global_step": 1400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 200.0, |
|
"grad_norm": 0.19941186904907227, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.0469, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_gen_len": 20.0, |
|
"eval_loss": 0.39197173714637756, |
|
"eval_rouge": 0.0476, |
|
"eval_runtime": 9.9932, |
|
"eval_samples_per_second": 0.2, |
|
"eval_steps_per_second": 0.1, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"grad_norm": 0.08469633013010025, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.0069, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"eval_gen_len": 17.5, |
|
"eval_loss": 0.3805849552154541, |
|
"eval_rouge": 0.1364, |
|
"eval_runtime": 1.1672, |
|
"eval_samples_per_second": 1.713, |
|
"eval_steps_per_second": 0.857, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 600.0, |
|
"grad_norm": 0.05082901939749718, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.0034, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 600.0, |
|
"eval_gen_len": 17.5, |
|
"eval_loss": 0.37990298867225647, |
|
"eval_rouge": 0.1364, |
|
"eval_runtime": 1.2177, |
|
"eval_samples_per_second": 1.642, |
|
"eval_steps_per_second": 0.821, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 800.0, |
|
"grad_norm": 0.03562492877244949, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.0022, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 800.0, |
|
"eval_gen_len": 109.0, |
|
"eval_loss": 0.39079928398132324, |
|
"eval_rouge": 0.1364, |
|
"eval_runtime": 10.1532, |
|
"eval_samples_per_second": 0.197, |
|
"eval_steps_per_second": 0.098, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1000.0, |
|
"grad_norm": 0.027455538511276245, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0016, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1000.0, |
|
"eval_gen_len": 13.5, |
|
"eval_loss": 0.3873954713344574, |
|
"eval_rouge": 0.1765, |
|
"eval_runtime": 10.52, |
|
"eval_samples_per_second": 0.19, |
|
"eval_steps_per_second": 0.095, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1200.0, |
|
"grad_norm": 0.02252182736992836, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0013, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1200.0, |
|
"eval_gen_len": 13.5, |
|
"eval_loss": 0.39042866230010986, |
|
"eval_rouge": 0.1765, |
|
"eval_runtime": 10.4367, |
|
"eval_samples_per_second": 0.192, |
|
"eval_steps_per_second": 0.096, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1400.0, |
|
"grad_norm": 0.019408540800213814, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0011, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1400.0, |
|
"eval_gen_len": 13.5, |
|
"eval_loss": 0.3919622004032135, |
|
"eval_rouge": 0.1765, |
|
"eval_runtime": 9.8993, |
|
"eval_samples_per_second": 0.202, |
|
"eval_steps_per_second": 0.101, |
|
"step": 1400 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 2000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2000, |
|
"save_steps": 200, |
|
"total_flos": 4096215416832000.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|