|
{ |
|
"best_metric": 1.9741289615631104, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.003188318002837603, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.5941590014188015e-05, |
|
"eval_loss": 2.3955931663513184, |
|
"eval_runtime": 1553.8147, |
|
"eval_samples_per_second": 16.999, |
|
"eval_steps_per_second": 4.25, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00015941590014188016, |
|
"grad_norm": 0.6692927479743958, |
|
"learning_rate": 4.0600000000000004e-05, |
|
"loss": 1.9118, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0003188318002837603, |
|
"grad_norm": 0.9186792969703674, |
|
"learning_rate": 8.120000000000001e-05, |
|
"loss": 1.8931, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.00047824770042564045, |
|
"grad_norm": 1.1474906206130981, |
|
"learning_rate": 0.00012179999999999999, |
|
"loss": 1.9426, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0006376636005675206, |
|
"grad_norm": 1.8628828525543213, |
|
"learning_rate": 0.00016240000000000002, |
|
"loss": 1.9296, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0007970795007094007, |
|
"grad_norm": 6.320926189422607, |
|
"learning_rate": 0.000203, |
|
"loss": 2.0846, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0007970795007094007, |
|
"eval_loss": 1.9741289615631104, |
|
"eval_runtime": 1551.6609, |
|
"eval_samples_per_second": 17.022, |
|
"eval_steps_per_second": 4.256, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0009564954008512809, |
|
"grad_norm": 0.5567274689674377, |
|
"learning_rate": 0.00020275275110137215, |
|
"loss": 1.8676, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.001115911300993161, |
|
"grad_norm": 0.9768388867378235, |
|
"learning_rate": 0.00020201220897726938, |
|
"loss": 1.824, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0012753272011350413, |
|
"grad_norm": 0.8935255408287048, |
|
"learning_rate": 0.00020078198147448128, |
|
"loss": 1.8593, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0014347431012769214, |
|
"grad_norm": 1.3647874593734741, |
|
"learning_rate": 0.00019906806213773937, |
|
"loss": 2.0022, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0015941590014188014, |
|
"grad_norm": 9.45513916015625, |
|
"learning_rate": 0.0001968788010097697, |
|
"loss": 1.7834, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0015941590014188014, |
|
"eval_loss": 2.133173704147339, |
|
"eval_runtime": 1552.7201, |
|
"eval_samples_per_second": 17.011, |
|
"eval_steps_per_second": 4.253, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0017535749015606817, |
|
"grad_norm": 0.7368646264076233, |
|
"learning_rate": 0.00019422486395072398, |
|
"loss": 1.7586, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0019129908017025618, |
|
"grad_norm": 0.8807580471038818, |
|
"learning_rate": 0.0001911191806751811, |
|
"loss": 1.764, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.002072406701844442, |
|
"grad_norm": 1.2745860815048218, |
|
"learning_rate": 0.00018757688175987723, |
|
"loss": 1.8485, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.002231822601986322, |
|
"grad_norm": 1.4981567859649658, |
|
"learning_rate": 0.00018361522492905716, |
|
"loss": 1.9636, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0023912385021282023, |
|
"grad_norm": 4.255372047424316, |
|
"learning_rate": 0.00017925351097657625, |
|
"loss": 2.0425, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0023912385021282023, |
|
"eval_loss": 1.9914931058883667, |
|
"eval_runtime": 1554.4671, |
|
"eval_samples_per_second": 16.992, |
|
"eval_steps_per_second": 4.248, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0025506544022700826, |
|
"grad_norm": 0.7145971655845642, |
|
"learning_rate": 0.00017451298973437308, |
|
"loss": 1.7097, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0027100703024119624, |
|
"grad_norm": 0.8129323124885559, |
|
"learning_rate": 0.0001694167565454241, |
|
"loss": 1.853, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0028694862025538427, |
|
"grad_norm": 1.0721731185913086, |
|
"learning_rate": 0.0001639896397455543, |
|
"loss": 1.8659, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.003028902102695723, |
|
"grad_norm": 1.490907907485962, |
|
"learning_rate": 0.0001582580797022808, |
|
"loss": 1.9599, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.003188318002837603, |
|
"grad_norm": 5.143823623657227, |
|
"learning_rate": 0.00015225, |
|
"loss": 1.9821, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.003188318002837603, |
|
"eval_loss": 2.058631658554077, |
|
"eval_runtime": 1553.3483, |
|
"eval_samples_per_second": 17.004, |
|
"eval_steps_per_second": 4.251, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.696910581891072e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|