File size: 2,384 Bytes
fe6f2b3 c5c3a79 fe6f2b3 c5c3a79 fe6f2b3 534a162 fb19a6f c5c3a79 fb19a6f 534a162 fb19a6f c5c3a79 fb19a6f 534a162 fb19a6f c5c3a79 fb19a6f 534a162 fb19a6f c5c3a79 fb19a6f 534a162 c5c3a79 fb19a6f c5c3a79 fb19a6f c5c3a79 fb19a6f c5c3a79 fb19a6f c5c3a79 fb19a6f c5c3a79 fb19a6f c5c3a79 fb19a6f c5c3a79 fb19a6f c5c3a79 fb19a6f 534a162 c5c3a79 fe6f2b3 c5c3a79 fe6f2b3 fb19a6f 534a162 fe6f2b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 500,
"global_step": 2060,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.4854368932038835,
"grad_norm": 1.9190031290054321,
"learning_rate": 8.794946550048592e-05,
"loss": 0.6452,
"step": 250
},
{
"epoch": 0.970873786407767,
"grad_norm": 1.3683266639709473,
"learning_rate": 7.580174927113704e-05,
"loss": 0.3863,
"step": 500
},
{
"epoch": 1.4563106796116505,
"grad_norm": 1.4516526460647583,
"learning_rate": 6.365403304178815e-05,
"loss": 0.3609,
"step": 750
},
{
"epoch": 1.941747572815534,
"grad_norm": 1.76002836227417,
"learning_rate": 5.150631681243926e-05,
"loss": 0.3382,
"step": 1000
},
{
"epoch": 2.4271844660194173,
"grad_norm": 1.7399638891220093,
"learning_rate": 3.9358600583090386e-05,
"loss": 0.3105,
"step": 1250
},
{
"epoch": 2.912621359223301,
"grad_norm": 1.924517035484314,
"learning_rate": 2.72108843537415e-05,
"loss": 0.2913,
"step": 1500
},
{
"epoch": 3.3980582524271843,
"grad_norm": 1.8956573009490967,
"learning_rate": 1.5063168124392615e-05,
"loss": 0.2694,
"step": 1750
},
{
"epoch": 3.883495145631068,
"grad_norm": 1.7733403444290161,
"learning_rate": 2.915451895043732e-06,
"loss": 0.2591,
"step": 2000
},
{
"epoch": 4.0,
"step": 2060,
"total_flos": 4.996544770678579e+17,
"train_loss": 0.35469038972576844,
"train_runtime": 73403.6809,
"train_samples_per_second": 7.184,
"train_steps_per_second": 0.028
}
],
"logging_steps": 250,
"max_steps": 2060,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.996544770678579e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}
|