KevinKibe's picture
Training in progress, step 1400, checkpoint
8dc5179 verified
raw
history blame
3.43 kB
{
"best_metric": 0.37990298867225647,
"best_model_checkpoint": "../KevinKibe/nllb-200-distilled-1.3B-finetuned-finetuned-finetuned/checkpoint-600",
"epoch": 1400.0,
"eval_steps": 200,
"global_step": 1400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 200.0,
"grad_norm": 0.19941186904907227,
"learning_rate": 1.8e-05,
"loss": 0.0469,
"step": 200
},
{
"epoch": 200.0,
"eval_gen_len": 20.0,
"eval_loss": 0.39197173714637756,
"eval_rouge": 0.0476,
"eval_runtime": 9.9932,
"eval_samples_per_second": 0.2,
"eval_steps_per_second": 0.1,
"step": 200
},
{
"epoch": 400.0,
"grad_norm": 0.08469633013010025,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.0069,
"step": 400
},
{
"epoch": 400.0,
"eval_gen_len": 17.5,
"eval_loss": 0.3805849552154541,
"eval_rouge": 0.1364,
"eval_runtime": 1.1672,
"eval_samples_per_second": 1.713,
"eval_steps_per_second": 0.857,
"step": 400
},
{
"epoch": 600.0,
"grad_norm": 0.05082901939749718,
"learning_rate": 1.4e-05,
"loss": 0.0034,
"step": 600
},
{
"epoch": 600.0,
"eval_gen_len": 17.5,
"eval_loss": 0.37990298867225647,
"eval_rouge": 0.1364,
"eval_runtime": 1.2177,
"eval_samples_per_second": 1.642,
"eval_steps_per_second": 0.821,
"step": 600
},
{
"epoch": 800.0,
"grad_norm": 0.03562492877244949,
"learning_rate": 1.2e-05,
"loss": 0.0022,
"step": 800
},
{
"epoch": 800.0,
"eval_gen_len": 109.0,
"eval_loss": 0.39079928398132324,
"eval_rouge": 0.1364,
"eval_runtime": 10.1532,
"eval_samples_per_second": 0.197,
"eval_steps_per_second": 0.098,
"step": 800
},
{
"epoch": 1000.0,
"grad_norm": 0.027455538511276245,
"learning_rate": 1e-05,
"loss": 0.0016,
"step": 1000
},
{
"epoch": 1000.0,
"eval_gen_len": 13.5,
"eval_loss": 0.3873954713344574,
"eval_rouge": 0.1765,
"eval_runtime": 10.52,
"eval_samples_per_second": 0.19,
"eval_steps_per_second": 0.095,
"step": 1000
},
{
"epoch": 1200.0,
"grad_norm": 0.02252182736992836,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0013,
"step": 1200
},
{
"epoch": 1200.0,
"eval_gen_len": 13.5,
"eval_loss": 0.39042866230010986,
"eval_rouge": 0.1765,
"eval_runtime": 10.4367,
"eval_samples_per_second": 0.192,
"eval_steps_per_second": 0.096,
"step": 1200
},
{
"epoch": 1400.0,
"grad_norm": 0.019408540800213814,
"learning_rate": 6e-06,
"loss": 0.0011,
"step": 1400
},
{
"epoch": 1400.0,
"eval_gen_len": 13.5,
"eval_loss": 0.3919622004032135,
"eval_rouge": 0.1765,
"eval_runtime": 9.8993,
"eval_samples_per_second": 0.202,
"eval_steps_per_second": 0.101,
"step": 1400
}
],
"logging_steps": 200,
"max_steps": 2000,
"num_input_tokens_seen": 0,
"num_train_epochs": 2000,
"save_steps": 200,
"total_flos": 4096215416832000.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}