fats-fme's picture
Training in progress, step 200, checkpoint
4d42d3b verified
{
"best_metric": 0.005579269025474787,
"best_model_checkpoint": "miner_id_24/checkpoint-200",
"epoch": 0.042180744490140254,
"eval_steps": 100,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00021090372245070126,
"eval_loss": 0.9089367985725403,
"eval_runtime": 708.2475,
"eval_samples_per_second": 2.82,
"eval_steps_per_second": 1.411,
"step": 1
},
{
"epoch": 0.0021090372245070126,
"grad_norm": 14.0634126663208,
"learning_rate": 1e-05,
"loss": 1.7235,
"step": 10
},
{
"epoch": 0.004218074449014025,
"grad_norm": 26.957786560058594,
"learning_rate": 2e-05,
"loss": 0.059,
"step": 20
},
{
"epoch": 0.006327111673521037,
"grad_norm": 0.11090046167373657,
"learning_rate": 3e-05,
"loss": 0.0071,
"step": 30
},
{
"epoch": 0.00843614889802805,
"grad_norm": 0.15285538136959076,
"learning_rate": 4e-05,
"loss": 0.0386,
"step": 40
},
{
"epoch": 0.010545186122535063,
"grad_norm": 0.04513653367757797,
"learning_rate": 5e-05,
"loss": 0.0078,
"step": 50
},
{
"epoch": 0.012654223347042075,
"grad_norm": 42.402244567871094,
"learning_rate": 6e-05,
"loss": 0.0887,
"step": 60
},
{
"epoch": 0.014763260571549088,
"grad_norm": 68.10028839111328,
"learning_rate": 7e-05,
"loss": 0.0061,
"step": 70
},
{
"epoch": 0.0168722977960561,
"grad_norm": 5.722660541534424,
"learning_rate": 8e-05,
"loss": 0.1373,
"step": 80
},
{
"epoch": 0.018981335020563112,
"grad_norm": 17.748676300048828,
"learning_rate": 9e-05,
"loss": 0.3995,
"step": 90
},
{
"epoch": 0.021090372245070127,
"grad_norm": 5.94190788269043,
"learning_rate": 0.0001,
"loss": 0.1634,
"step": 100
},
{
"epoch": 0.021090372245070127,
"eval_loss": 0.035220917314291,
"eval_runtime": 707.5102,
"eval_samples_per_second": 2.823,
"eval_steps_per_second": 1.412,
"step": 100
},
{
"epoch": 0.02319940946957714,
"grad_norm": 62.70212173461914,
"learning_rate": 9.755282581475769e-05,
"loss": 0.0896,
"step": 110
},
{
"epoch": 0.02530844669408415,
"grad_norm": 0.842799723148346,
"learning_rate": 9.045084971874738e-05,
"loss": 0.1349,
"step": 120
},
{
"epoch": 0.027417483918591164,
"grad_norm": 166.96304321289062,
"learning_rate": 7.938926261462366e-05,
"loss": 0.2701,
"step": 130
},
{
"epoch": 0.029526521143098176,
"grad_norm": 17.788463592529297,
"learning_rate": 6.545084971874738e-05,
"loss": 0.1667,
"step": 140
},
{
"epoch": 0.03163555836760519,
"grad_norm": 1.1575299501419067,
"learning_rate": 5e-05,
"loss": 0.1699,
"step": 150
},
{
"epoch": 0.0337445955921122,
"grad_norm": 12.75675106048584,
"learning_rate": 3.4549150281252636e-05,
"loss": 0.0575,
"step": 160
},
{
"epoch": 0.03585363281661921,
"grad_norm": 0.6250290870666504,
"learning_rate": 2.061073738537635e-05,
"loss": 0.0638,
"step": 170
},
{
"epoch": 0.037962670041126224,
"grad_norm": 0.22166423499584198,
"learning_rate": 9.549150281252633e-06,
"loss": 0.0095,
"step": 180
},
{
"epoch": 0.040071707265633236,
"grad_norm": 0.012423542328178883,
"learning_rate": 2.4471741852423237e-06,
"loss": 0.013,
"step": 190
},
{
"epoch": 0.042180744490140254,
"grad_norm": 0.02760171703994274,
"learning_rate": 0.0,
"loss": 0.063,
"step": 200
},
{
"epoch": 0.042180744490140254,
"eval_loss": 0.005579269025474787,
"eval_runtime": 707.9758,
"eval_samples_per_second": 2.821,
"eval_steps_per_second": 1.411,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.463984163127296e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}