File size: 1,693 Bytes
abc73ca c10fcd0 abc73ca c10fcd0 abc73ca c10fcd0 abc73ca c10fcd0 abc73ca c10fcd0 abc73ca c10fcd0 abc73ca c10fcd0 abc73ca c10fcd0 abc73ca c10fcd0 abc73ca c10fcd0 abc73ca c10fcd0 abc73ca c10fcd0 abc73ca c10fcd0 abc73ca c10fcd0 abc73ca c10fcd0 abc73ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.0,
"eval_steps": 500,
"global_step": 1650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9090909090909091,
"grad_norm": 1.031325101852417,
"learning_rate": 0.0002,
"loss": 0.8838,
"step": 250
},
{
"epoch": 1.8181818181818183,
"grad_norm": 1.0007693767547607,
"learning_rate": 0.0002,
"loss": 0.4622,
"step": 500
},
{
"epoch": 2.7272727272727275,
"grad_norm": 2.0547432899475098,
"learning_rate": 0.0002,
"loss": 0.3032,
"step": 750
},
{
"epoch": 3.6363636363636362,
"grad_norm": 0.6284219026565552,
"learning_rate": 0.0002,
"loss": 0.2312,
"step": 1000
},
{
"epoch": 4.545454545454545,
"grad_norm": 1.0567408800125122,
"learning_rate": 0.0002,
"loss": 0.1959,
"step": 1250
},
{
"epoch": 5.454545454545454,
"grad_norm": 0.54230135679245,
"learning_rate": 0.0002,
"loss": 0.1746,
"step": 1500
}
],
"logging_steps": 250,
"max_steps": 1650,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.103034997322547e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|