Hanzalwi's picture
Training in progress, step 2200, checkpoint
e4702c6
raw
history blame
7.22 kB
{
"best_metric": 1.2910176515579224,
"best_model_checkpoint": "./outputs/checkpoint-2200",
"epoch": 2.9333333333333336,
"eval_steps": 100,
"global_step": 2200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.13,
"learning_rate": 0.0002,
"loss": 1.855,
"step": 100
},
{
"epoch": 0.13,
"eval_loss": 1.4976259469985962,
"eval_runtime": 47.2737,
"eval_samples_per_second": 30.694,
"eval_steps_per_second": 3.85,
"step": 100
},
{
"epoch": 0.27,
"learning_rate": 0.0002,
"loss": 1.5891,
"step": 200
},
{
"epoch": 0.27,
"eval_loss": 1.4663879871368408,
"eval_runtime": 47.1865,
"eval_samples_per_second": 30.75,
"eval_steps_per_second": 3.857,
"step": 200
},
{
"epoch": 0.4,
"learning_rate": 0.0002,
"loss": 1.5693,
"step": 300
},
{
"epoch": 0.4,
"eval_loss": 1.4471981525421143,
"eval_runtime": 47.1683,
"eval_samples_per_second": 30.762,
"eval_steps_per_second": 3.859,
"step": 300
},
{
"epoch": 0.53,
"learning_rate": 0.0002,
"loss": 1.5495,
"step": 400
},
{
"epoch": 0.53,
"eval_loss": 1.4266287088394165,
"eval_runtime": 47.1866,
"eval_samples_per_second": 30.75,
"eval_steps_per_second": 3.857,
"step": 400
},
{
"epoch": 0.67,
"learning_rate": 0.0002,
"loss": 1.5354,
"step": 500
},
{
"epoch": 0.67,
"eval_loss": 1.410528540611267,
"eval_runtime": 47.2451,
"eval_samples_per_second": 30.712,
"eval_steps_per_second": 3.852,
"step": 500
},
{
"epoch": 0.8,
"learning_rate": 0.0002,
"loss": 1.5198,
"step": 600
},
{
"epoch": 0.8,
"eval_loss": 1.4007145166397095,
"eval_runtime": 46.9739,
"eval_samples_per_second": 30.89,
"eval_steps_per_second": 3.874,
"step": 600
},
{
"epoch": 0.93,
"learning_rate": 0.0002,
"loss": 1.5095,
"step": 700
},
{
"epoch": 0.93,
"eval_loss": 1.388436198234558,
"eval_runtime": 47.0302,
"eval_samples_per_second": 30.853,
"eval_steps_per_second": 3.87,
"step": 700
},
{
"epoch": 1.07,
"learning_rate": 0.0002,
"loss": 1.488,
"step": 800
},
{
"epoch": 1.07,
"eval_loss": 1.3770211935043335,
"eval_runtime": 47.1554,
"eval_samples_per_second": 30.771,
"eval_steps_per_second": 3.86,
"step": 800
},
{
"epoch": 1.2,
"learning_rate": 0.0002,
"loss": 1.4685,
"step": 900
},
{
"epoch": 1.2,
"eval_loss": 1.3670101165771484,
"eval_runtime": 47.4079,
"eval_samples_per_second": 30.607,
"eval_steps_per_second": 3.839,
"step": 900
},
{
"epoch": 1.33,
"learning_rate": 0.0002,
"loss": 1.4583,
"step": 1000
},
{
"epoch": 1.33,
"eval_loss": 1.3592472076416016,
"eval_runtime": 47.3317,
"eval_samples_per_second": 30.656,
"eval_steps_per_second": 3.845,
"step": 1000
},
{
"epoch": 1.47,
"learning_rate": 0.0002,
"loss": 1.4546,
"step": 1100
},
{
"epoch": 1.47,
"eval_loss": 1.3527010679244995,
"eval_runtime": 47.2776,
"eval_samples_per_second": 30.691,
"eval_steps_per_second": 3.85,
"step": 1100
},
{
"epoch": 1.6,
"learning_rate": 0.0002,
"loss": 1.4392,
"step": 1200
},
{
"epoch": 1.6,
"eval_loss": 1.3437373638153076,
"eval_runtime": 47.1936,
"eval_samples_per_second": 30.746,
"eval_steps_per_second": 3.856,
"step": 1200
},
{
"epoch": 1.73,
"learning_rate": 0.0002,
"loss": 1.45,
"step": 1300
},
{
"epoch": 1.73,
"eval_loss": 1.3367496728897095,
"eval_runtime": 47.2517,
"eval_samples_per_second": 30.708,
"eval_steps_per_second": 3.852,
"step": 1300
},
{
"epoch": 1.87,
"learning_rate": 0.0002,
"loss": 1.4321,
"step": 1400
},
{
"epoch": 1.87,
"eval_loss": 1.3307961225509644,
"eval_runtime": 47.1256,
"eval_samples_per_second": 30.79,
"eval_steps_per_second": 3.862,
"step": 1400
},
{
"epoch": 2.0,
"learning_rate": 0.0002,
"loss": 1.4336,
"step": 1500
},
{
"epoch": 2.0,
"eval_loss": 1.3263577222824097,
"eval_runtime": 47.1607,
"eval_samples_per_second": 30.767,
"eval_steps_per_second": 3.859,
"step": 1500
},
{
"epoch": 2.13,
"learning_rate": 0.0002,
"loss": 1.3981,
"step": 1600
},
{
"epoch": 2.13,
"eval_loss": 1.319887638092041,
"eval_runtime": 47.019,
"eval_samples_per_second": 30.86,
"eval_steps_per_second": 3.871,
"step": 1600
},
{
"epoch": 2.27,
"learning_rate": 0.0002,
"loss": 1.3969,
"step": 1700
},
{
"epoch": 2.27,
"eval_loss": 1.3168717622756958,
"eval_runtime": 47.2245,
"eval_samples_per_second": 30.726,
"eval_steps_per_second": 3.854,
"step": 1700
},
{
"epoch": 2.4,
"learning_rate": 0.0002,
"loss": 1.3862,
"step": 1800
},
{
"epoch": 2.4,
"eval_loss": 1.3101677894592285,
"eval_runtime": 47.1712,
"eval_samples_per_second": 30.76,
"eval_steps_per_second": 3.858,
"step": 1800
},
{
"epoch": 2.53,
"learning_rate": 0.0002,
"loss": 1.3863,
"step": 1900
},
{
"epoch": 2.53,
"eval_loss": 1.304863691329956,
"eval_runtime": 47.3226,
"eval_samples_per_second": 30.662,
"eval_steps_per_second": 3.846,
"step": 1900
},
{
"epoch": 2.67,
"learning_rate": 0.0002,
"loss": 1.3918,
"step": 2000
},
{
"epoch": 2.67,
"eval_loss": 1.2992783784866333,
"eval_runtime": 47.3036,
"eval_samples_per_second": 30.674,
"eval_steps_per_second": 3.847,
"step": 2000
},
{
"epoch": 2.8,
"learning_rate": 0.0002,
"loss": 1.3869,
"step": 2100
},
{
"epoch": 2.8,
"eval_loss": 1.2948063611984253,
"eval_runtime": 47.587,
"eval_samples_per_second": 30.492,
"eval_steps_per_second": 3.825,
"step": 2100
},
{
"epoch": 2.93,
"learning_rate": 0.0002,
"loss": 1.3818,
"step": 2200
},
{
"epoch": 2.93,
"eval_loss": 1.2910176515579224,
"eval_runtime": 47.3703,
"eval_samples_per_second": 30.631,
"eval_steps_per_second": 3.842,
"step": 2200
}
],
"logging_steps": 100,
"max_steps": 2250,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 7.376989486841856e+16,
"trial_name": null,
"trial_params": null
}