food-recipe-generation / trainer_state.json
Shresthadev403's picture
End of training
18d986f
raw
history blame
4.79 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.358400662937642,
"eval_steps": 50000,
"global_step": 650000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.8,
"learning_rate": 4.9920355054102726e-05,
"loss": 1.1412,
"step": 50000
},
{
"epoch": 0.8,
"eval_loss": 1.011365294456482,
"eval_runtime": 2006.5534,
"eval_samples_per_second": 111.193,
"eval_steps_per_second": 1.738,
"step": 50000
},
{
"epoch": 1.59,
"learning_rate": 4.992035346050262e-05,
"loss": 1.0123,
"step": 100000
},
{
"epoch": 1.59,
"eval_loss": 0.9693423509597778,
"eval_runtime": 1995.4615,
"eval_samples_per_second": 111.811,
"eval_steps_per_second": 1.747,
"step": 100000
},
{
"epoch": 2.39,
"learning_rate": 4.992034708610221e-05,
"loss": 0.9754,
"step": 150000
},
{
"epoch": 2.39,
"eval_loss": 0.9472731351852417,
"eval_runtime": 1997.2157,
"eval_samples_per_second": 111.713,
"eval_steps_per_second": 1.746,
"step": 150000
},
{
"epoch": 3.19,
"learning_rate": 4.992034549250211e-05,
"loss": 0.9539,
"step": 200000
},
{
"epoch": 3.19,
"eval_loss": 0.9325647354125977,
"eval_runtime": 2015.1208,
"eval_samples_per_second": 110.72,
"eval_steps_per_second": 1.73,
"step": 200000
},
{
"epoch": 3.98,
"learning_rate": 4.992035027330242e-05,
"loss": 0.9387,
"step": 250000
},
{
"epoch": 3.98,
"eval_loss": 0.9212433099746704,
"eval_runtime": 2011.4269,
"eval_samples_per_second": 110.924,
"eval_steps_per_second": 1.734,
"step": 250000
},
{
"epoch": 4.78,
"learning_rate": 4.992035027330242e-05,
"loss": 0.9243,
"step": 300000
},
{
"epoch": 4.78,
"eval_loss": 0.9138051271438599,
"eval_runtime": 2011.3973,
"eval_samples_per_second": 110.925,
"eval_steps_per_second": 1.734,
"step": 300000
},
{
"epoch": 5.58,
"learning_rate": 4.992035186690252e-05,
"loss": 0.9144,
"step": 350000
},
{
"epoch": 5.58,
"eval_loss": 0.9093130826950073,
"eval_runtime": 1998.4573,
"eval_samples_per_second": 111.644,
"eval_steps_per_second": 1.745,
"step": 350000
},
{
"epoch": 6.37,
"learning_rate": 4.9920355054102726e-05,
"loss": 0.906,
"step": 400000
},
{
"epoch": 6.37,
"eval_loss": 0.9041373133659363,
"eval_runtime": 1998.7351,
"eval_samples_per_second": 111.628,
"eval_steps_per_second": 1.745,
"step": 400000
},
{
"epoch": 7.17,
"learning_rate": 4.9920355054102726e-05,
"loss": 0.8994,
"step": 450000
},
{
"epoch": 7.17,
"eval_loss": 0.9003444910049438,
"eval_runtime": 1982.6092,
"eval_samples_per_second": 112.536,
"eval_steps_per_second": 1.759,
"step": 450000
},
{
"epoch": 7.97,
"learning_rate": 4.992035186690252e-05,
"loss": 0.8933,
"step": 500000
},
{
"epoch": 7.97,
"eval_loss": 0.8956149220466614,
"eval_runtime": 2002.7479,
"eval_samples_per_second": 111.404,
"eval_steps_per_second": 1.741,
"step": 500000
},
{
"epoch": 8.76,
"learning_rate": 4.9920355054102726e-05,
"loss": 0.8856,
"step": 550000
},
{
"epoch": 8.76,
"eval_loss": 0.8930546045303345,
"eval_runtime": 1996.5839,
"eval_samples_per_second": 111.748,
"eval_steps_per_second": 1.746,
"step": 550000
},
{
"epoch": 9.56,
"learning_rate": 4.992035346050262e-05,
"loss": 0.8802,
"step": 600000
},
{
"epoch": 9.56,
"eval_loss": 0.89084392786026,
"eval_runtime": 1991.4984,
"eval_samples_per_second": 112.034,
"eval_steps_per_second": 1.751,
"step": 600000
},
{
"epoch": 10.36,
"learning_rate": 4.992035346050262e-05,
"loss": 0.8763,
"step": 650000
},
{
"epoch": 10.36,
"eval_loss": 0.8895950317382812,
"eval_runtime": 2006.0064,
"eval_samples_per_second": 111.223,
"eval_steps_per_second": 1.738,
"step": 650000
}
],
"logging_steps": 50000,
"max_steps": 31375500,
"num_input_tokens_seen": 0,
"num_train_epochs": 500,
"save_steps": 50000,
"total_flos": 1.3587153002496e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}