File size: 2,300 Bytes
efe839e 8ff8ef0 efe839e 62d65b0 8ff8ef0 efe839e 62d65b0 8ff8ef0 efe839e 62d65b0 8ff8ef0 efe839e 62d65b0 8ff8ef0 efe839e 62d65b0 8ff8ef0 efe839e 62d65b0 8ff8ef0 efe839e 62d65b0 8ff8ef0 efe839e 62d65b0 8ff8ef0 efe839e 62d65b0 8ff8ef0 efe839e 62d65b0 8ff8ef0 efe839e 62d65b0 8ff8ef0 efe839e 62d65b0 8ff8ef0 efe839e 62d65b0 8ff8ef0 efe839e 8ff8ef0 62d65b0 8ff8ef0 efe839e 8ff8ef0 efe839e 62d65b0 efe839e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 6936,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.22,
"learning_rate": 4.639561707035756e-05,
"loss": 0.8543,
"step": 500
},
{
"epoch": 0.43,
"learning_rate": 4.2791234140715114e-05,
"loss": 0.512,
"step": 1000
},
{
"epoch": 0.65,
"learning_rate": 3.9186851211072664e-05,
"loss": 0.4291,
"step": 1500
},
{
"epoch": 0.87,
"learning_rate": 3.558246828143022e-05,
"loss": 0.3857,
"step": 2000
},
{
"epoch": 1.08,
"learning_rate": 3.1978085351787776e-05,
"loss": 0.3264,
"step": 2500
},
{
"epoch": 1.3,
"learning_rate": 2.8373702422145332e-05,
"loss": 0.2763,
"step": 3000
},
{
"epoch": 1.51,
"learning_rate": 2.4769319492502884e-05,
"loss": 0.2598,
"step": 3500
},
{
"epoch": 1.73,
"learning_rate": 2.116493656286044e-05,
"loss": 0.2409,
"step": 4000
},
{
"epoch": 1.95,
"learning_rate": 1.7560553633217993e-05,
"loss": 0.2283,
"step": 4500
},
{
"epoch": 2.16,
"learning_rate": 1.395617070357555e-05,
"loss": 0.185,
"step": 5000
},
{
"epoch": 2.38,
"learning_rate": 1.0351787773933102e-05,
"loss": 0.1686,
"step": 5500
},
{
"epoch": 2.6,
"learning_rate": 6.747404844290659e-06,
"loss": 0.1595,
"step": 6000
},
{
"epoch": 2.81,
"learning_rate": 3.143021914648212e-06,
"loss": 0.1537,
"step": 6500
},
{
"epoch": 3.0,
"step": 6936,
"total_flos": 2.370754172808069e+17,
"train_loss": 0.3107225017701603,
"train_runtime": 5244.1454,
"train_samples_per_second": 338.522,
"train_steps_per_second": 1.323
}
],
"logging_steps": 500,
"max_steps": 6936,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 2.370754172808069e+17,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}
|