lesso03's picture
Training in progress, step 200, checkpoint
899b172 verified
{
"best_metric": 1.9741289615631104,
"best_model_checkpoint": "miner_id_24/checkpoint-50",
"epoch": 0.003188318002837603,
"eval_steps": 50,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.5941590014188015e-05,
"eval_loss": 2.3955931663513184,
"eval_runtime": 1553.8147,
"eval_samples_per_second": 16.999,
"eval_steps_per_second": 4.25,
"step": 1
},
{
"epoch": 0.00015941590014188016,
"grad_norm": 0.6692927479743958,
"learning_rate": 4.0600000000000004e-05,
"loss": 1.9118,
"step": 10
},
{
"epoch": 0.0003188318002837603,
"grad_norm": 0.9186792969703674,
"learning_rate": 8.120000000000001e-05,
"loss": 1.8931,
"step": 20
},
{
"epoch": 0.00047824770042564045,
"grad_norm": 1.1474906206130981,
"learning_rate": 0.00012179999999999999,
"loss": 1.9426,
"step": 30
},
{
"epoch": 0.0006376636005675206,
"grad_norm": 1.8628828525543213,
"learning_rate": 0.00016240000000000002,
"loss": 1.9296,
"step": 40
},
{
"epoch": 0.0007970795007094007,
"grad_norm": 6.320926189422607,
"learning_rate": 0.000203,
"loss": 2.0846,
"step": 50
},
{
"epoch": 0.0007970795007094007,
"eval_loss": 1.9741289615631104,
"eval_runtime": 1551.6609,
"eval_samples_per_second": 17.022,
"eval_steps_per_second": 4.256,
"step": 50
},
{
"epoch": 0.0009564954008512809,
"grad_norm": 0.5567274689674377,
"learning_rate": 0.00020275275110137215,
"loss": 1.8676,
"step": 60
},
{
"epoch": 0.001115911300993161,
"grad_norm": 0.9768388867378235,
"learning_rate": 0.00020201220897726938,
"loss": 1.824,
"step": 70
},
{
"epoch": 0.0012753272011350413,
"grad_norm": 0.8935255408287048,
"learning_rate": 0.00020078198147448128,
"loss": 1.8593,
"step": 80
},
{
"epoch": 0.0014347431012769214,
"grad_norm": 1.3647874593734741,
"learning_rate": 0.00019906806213773937,
"loss": 2.0022,
"step": 90
},
{
"epoch": 0.0015941590014188014,
"grad_norm": 9.45513916015625,
"learning_rate": 0.0001968788010097697,
"loss": 1.7834,
"step": 100
},
{
"epoch": 0.0015941590014188014,
"eval_loss": 2.133173704147339,
"eval_runtime": 1552.7201,
"eval_samples_per_second": 17.011,
"eval_steps_per_second": 4.253,
"step": 100
},
{
"epoch": 0.0017535749015606817,
"grad_norm": 0.7368646264076233,
"learning_rate": 0.00019422486395072398,
"loss": 1.7586,
"step": 110
},
{
"epoch": 0.0019129908017025618,
"grad_norm": 0.8807580471038818,
"learning_rate": 0.0001911191806751811,
"loss": 1.764,
"step": 120
},
{
"epoch": 0.002072406701844442,
"grad_norm": 1.2745860815048218,
"learning_rate": 0.00018757688175987723,
"loss": 1.8485,
"step": 130
},
{
"epoch": 0.002231822601986322,
"grad_norm": 1.4981567859649658,
"learning_rate": 0.00018361522492905716,
"loss": 1.9636,
"step": 140
},
{
"epoch": 0.0023912385021282023,
"grad_norm": 4.255372047424316,
"learning_rate": 0.00017925351097657625,
"loss": 2.0425,
"step": 150
},
{
"epoch": 0.0023912385021282023,
"eval_loss": 1.9914931058883667,
"eval_runtime": 1554.4671,
"eval_samples_per_second": 16.992,
"eval_steps_per_second": 4.248,
"step": 150
},
{
"epoch": 0.0025506544022700826,
"grad_norm": 0.7145971655845642,
"learning_rate": 0.00017451298973437308,
"loss": 1.7097,
"step": 160
},
{
"epoch": 0.0027100703024119624,
"grad_norm": 0.8129323124885559,
"learning_rate": 0.0001694167565454241,
"loss": 1.853,
"step": 170
},
{
"epoch": 0.0028694862025538427,
"grad_norm": 1.0721731185913086,
"learning_rate": 0.0001639896397455543,
"loss": 1.8659,
"step": 180
},
{
"epoch": 0.003028902102695723,
"grad_norm": 1.490907907485962,
"learning_rate": 0.0001582580797022808,
"loss": 1.9599,
"step": 190
},
{
"epoch": 0.003188318002837603,
"grad_norm": 5.143823623657227,
"learning_rate": 0.00015225,
"loss": 1.9821,
"step": 200
},
{
"epoch": 0.003188318002837603,
"eval_loss": 2.058631658554077,
"eval_runtime": 1553.3483,
"eval_samples_per_second": 17.004,
"eval_steps_per_second": 4.251,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.696910581891072e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}