granite-3.0-8b-instruct-ft3 / loss_metrics.json
root
Initial Commit
171d73c
raw
history blame
5.7 kB
[
{
"step": 30,
"loss": 1.4062,
"type": "train",
"grad_norm": 8.482284545898438,
"learning_rate": 1.8347107438016528e-06,
"epoch": 0.2475502836513667
},
{
"step": 30,
"loss": 1.1563361883163452,
"type": "eval",
"eval_runtime": 238.6436,
"eval_samples_per_second": 2.891,
"eval_steps_per_second": 2.891,
"epoch": 0.2475502836513667
},
{
"step": 60,
"loss": 1.1662,
"type": "train",
"grad_norm": 6.751504898071289,
"learning_rate": 1.6694214876033058e-06,
"epoch": 0.4951005673027334
},
{
"step": 60,
"loss": 1.0914983749389648,
"type": "eval",
"eval_runtime": 248.6513,
"eval_samples_per_second": 2.775,
"eval_steps_per_second": 2.775,
"epoch": 0.4951005673027334
},
{
"step": 90,
"loss": 1.1109,
"type": "train",
"grad_norm": 4.135236740112305,
"learning_rate": 1.5041322314049587e-06,
"epoch": 0.7426508509541001
},
{
"step": 90,
"loss": 1.0496803522109985,
"type": "eval",
"eval_runtime": 239.5109,
"eval_samples_per_second": 2.881,
"eval_steps_per_second": 2.881,
"epoch": 0.7426508509541001
},
{
"step": 120,
"loss": 1.0639,
"type": "train",
"grad_norm": 5.695450782775879,
"learning_rate": 1.3388429752066116e-06,
"epoch": 0.9902011346054668
},
{
"step": 120,
"loss": 1.0101323127746582,
"type": "eval",
"eval_runtime": 234.2087,
"eval_samples_per_second": 2.946,
"eval_steps_per_second": 2.946,
"epoch": 0.9902011346054668
},
{
"step": 150,
"loss": 0.9416,
"type": "train",
"grad_norm": 7.571918487548828,
"learning_rate": 1.1735537190082645e-06,
"epoch": 1.2377514182568334
},
{
"step": 150,
"loss": 1.0040597915649414,
"type": "eval",
"eval_runtime": 247.1746,
"eval_samples_per_second": 2.792,
"eval_steps_per_second": 2.792,
"epoch": 1.2377514182568334
},
{
"step": 180,
"loss": 0.925,
"type": "train",
"grad_norm": 4.53548002243042,
"learning_rate": 1.0082644628099172e-06,
"epoch": 1.4853017019082002
},
{
"step": 180,
"loss": 0.9938931465148926,
"type": "eval",
"eval_runtime": 234.8252,
"eval_samples_per_second": 2.938,
"eval_steps_per_second": 2.938,
"epoch": 1.4853017019082002
},
{
"step": 210,
"loss": 0.9033,
"type": "train",
"grad_norm": 5.361794948577881,
"learning_rate": 8.429752066115701e-07,
"epoch": 1.7328519855595668
},
{
"step": 210,
"loss": 0.9868502020835876,
"type": "eval",
"eval_runtime": 234.8772,
"eval_samples_per_second": 2.938,
"eval_steps_per_second": 2.938,
"epoch": 1.7328519855595668
},
{
"step": 240,
"loss": 0.9087,
"type": "train",
"grad_norm": 4.525313377380371,
"learning_rate": 6.776859504132231e-07,
"epoch": 1.9804022692109333
},
{
"step": 240,
"loss": 0.9824326634407043,
"type": "eval",
"eval_runtime": 234.8918,
"eval_samples_per_second": 2.938,
"eval_steps_per_second": 2.938,
"epoch": 1.9804022692109333
},
{
"step": 270,
"loss": 0.8166,
"type": "train",
"grad_norm": 4.654973030090332,
"learning_rate": 5.12396694214876e-07,
"epoch": 2.2279525528623
},
{
"step": 270,
"loss": 0.9874295592308044,
"type": "eval",
"eval_runtime": 243.7953,
"eval_samples_per_second": 2.83,
"eval_steps_per_second": 2.83,
"epoch": 2.2279525528623
},
{
"step": 300,
"loss": 0.8226,
"type": "train",
"grad_norm": 5.9346442222595215,
"learning_rate": 3.471074380165289e-07,
"epoch": 2.475502836513667
},
{
"step": 300,
"loss": 0.9854046106338501,
"type": "eval",
"eval_runtime": 243.3847,
"eval_samples_per_second": 2.835,
"eval_steps_per_second": 2.835,
"epoch": 2.475502836513667
},
{
"step": 330,
"loss": 0.8289,
"type": "train",
"grad_norm": 4.637845516204834,
"learning_rate": 1.818181818181818e-07,
"epoch": 2.7230531201650336
},
{
"step": 330,
"loss": 0.9841367602348328,
"type": "eval",
"eval_runtime": 242.6797,
"eval_samples_per_second": 2.843,
"eval_steps_per_second": 2.843,
"epoch": 2.7230531201650336
},
{
"step": 360,
"loss": 0.8137,
"type": "train",
"grad_norm": 4.132049560546875,
"learning_rate": 1.652892561983471e-08,
"epoch": 2.9706034038164004
},
{
"step": 360,
"loss": 0.9834251403808594,
"type": "eval",
"eval_runtime": 242.7445,
"eval_samples_per_second": 2.842,
"eval_steps_per_second": 2.842,
"epoch": 2.9706034038164004
},
{
"step": 363,
"train_runtime": 22924.4691,
"train_samples_per_second": 0.507,
"train_steps_per_second": 0.016,
"total_flos": 7877706776576.0,
"train_loss": 0.9745982139892158,
"epoch": 2.9953584321815367
}
]