File size: 2,301 Bytes
efe839e e5702b5 efe839e 62d65b0 c4316da e5702b5 efe839e 62d65b0 c4316da e5702b5 efe839e 62d65b0 c4316da e5702b5 efe839e 62d65b0 c4316da e5702b5 efe839e 62d65b0 c4316da e5702b5 efe839e 62d65b0 c4316da e5702b5 efe839e 62d65b0 c4316da e5702b5 efe839e 62d65b0 c4316da e5702b5 efe839e 62d65b0 c4316da e5702b5 efe839e 62d65b0 c4316da e5702b5 efe839e 62d65b0 c4316da e5702b5 efe839e 62d65b0 c4316da e5702b5 efe839e 62d65b0 c4316da e5702b5 efe839e e5702b5 62d65b0 c4316da efe839e e5702b5 efe839e 62d65b0 efe839e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 6936,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.22,
"learning_rate": 4.639561707035756e-05,
"loss": 0.8105,
"step": 500
},
{
"epoch": 0.43,
"learning_rate": 4.2791234140715114e-05,
"loss": 0.5111,
"step": 1000
},
{
"epoch": 0.65,
"learning_rate": 3.9186851211072664e-05,
"loss": 0.4297,
"step": 1500
},
{
"epoch": 0.87,
"learning_rate": 3.558246828143022e-05,
"loss": 0.3814,
"step": 2000
},
{
"epoch": 1.08,
"learning_rate": 3.1978085351787776e-05,
"loss": 0.3232,
"step": 2500
},
{
"epoch": 1.3,
"learning_rate": 2.8373702422145332e-05,
"loss": 0.2746,
"step": 3000
},
{
"epoch": 1.51,
"learning_rate": 2.4769319492502884e-05,
"loss": 0.2602,
"step": 3500
},
{
"epoch": 1.73,
"learning_rate": 2.116493656286044e-05,
"loss": 0.239,
"step": 4000
},
{
"epoch": 1.95,
"learning_rate": 1.7560553633217993e-05,
"loss": 0.2268,
"step": 4500
},
{
"epoch": 2.16,
"learning_rate": 1.395617070357555e-05,
"loss": 0.1843,
"step": 5000
},
{
"epoch": 2.38,
"learning_rate": 1.0351787773933102e-05,
"loss": 0.1678,
"step": 5500
},
{
"epoch": 2.6,
"learning_rate": 6.747404844290659e-06,
"loss": 0.1586,
"step": 6000
},
{
"epoch": 2.81,
"learning_rate": 3.143021914648212e-06,
"loss": 0.1531,
"step": 6500
},
{
"epoch": 3.0,
"step": 6936,
"total_flos": 2.370754172808069e+17,
"train_loss": 0.3064197311489365,
"train_runtime": 5244.4956,
"train_samples_per_second": 338.499,
"train_steps_per_second": 1.323
}
],
"logging_steps": 500,
"max_steps": 6936,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 2.370754172808069e+17,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}
|