File size: 3,941 Bytes
577150f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
{
"best_metric": 2.404644012451172,
"best_model_checkpoint": "/mnt/green-efs/bashar.alhafni/models/pythia_1b/prepend_tokens_10/checkpoint-1325",
"epoch": 9.98868351565447,
"global_step": 6620,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 5e-05,
"loss": 2.6227,
"step": 662
},
{
"epoch": 1.0,
"eval_accuracy": 0.15464476939026045,
"eval_loss": 2.4311635494232178,
"eval_runtime": 202.921,
"eval_samples_per_second": 51.774,
"eval_steps_per_second": 1.621,
"step": 662
},
{
"epoch": 2.0,
"learning_rate": 5e-05,
"loss": 2.1857,
"step": 1325
},
{
"epoch": 2.0,
"eval_accuracy": 0.156901358233316,
"eval_loss": 2.404644012451172,
"eval_runtime": 204.3521,
"eval_samples_per_second": 51.411,
"eval_steps_per_second": 1.61,
"step": 1325
},
{
"epoch": 3.0,
"learning_rate": 5e-05,
"loss": 1.8241,
"step": 1988
},
{
"epoch": 3.0,
"eval_accuracy": 0.15710633350323114,
"eval_loss": 2.468627691268921,
"eval_runtime": 205.5884,
"eval_samples_per_second": 51.102,
"eval_steps_per_second": 1.6,
"step": 1988
},
{
"epoch": 4.0,
"learning_rate": 5e-05,
"loss": 1.4508,
"step": 2651
},
{
"epoch": 4.0,
"eval_accuracy": 0.15584242788973726,
"eval_loss": 2.629725456237793,
"eval_runtime": 204.4664,
"eval_samples_per_second": 51.383,
"eval_steps_per_second": 1.609,
"step": 2651
},
{
"epoch": 5.0,
"learning_rate": 5e-05,
"loss": 1.0894,
"step": 3313
},
{
"epoch": 5.0,
"eval_accuracy": 0.1536770218721546,
"eval_loss": 2.8261826038360596,
"eval_runtime": 204.5216,
"eval_samples_per_second": 51.369,
"eval_steps_per_second": 1.609,
"step": 3313
},
{
"epoch": 6.0,
"learning_rate": 5e-05,
"loss": 0.7696,
"step": 3976
},
{
"epoch": 6.0,
"eval_accuracy": 0.15195952822378275,
"eval_loss": 3.0950815677642822,
"eval_runtime": 202.8864,
"eval_samples_per_second": 51.783,
"eval_steps_per_second": 1.622,
"step": 3976
},
{
"epoch": 7.0,
"learning_rate": 5e-05,
"loss": 0.5194,
"step": 4639
},
{
"epoch": 7.0,
"eval_accuracy": 0.1502570146110243,
"eval_loss": 3.3549509048461914,
"eval_runtime": 202.9616,
"eval_samples_per_second": 51.763,
"eval_steps_per_second": 1.621,
"step": 4639
},
{
"epoch": 8.0,
"learning_rate": 5e-05,
"loss": 0.347,
"step": 5302
},
{
"epoch": 8.0,
"eval_accuracy": 0.14944492920211863,
"eval_loss": 3.607959032058716,
"eval_runtime": 203.1016,
"eval_samples_per_second": 51.728,
"eval_steps_per_second": 1.62,
"step": 5302
},
{
"epoch": 9.0,
"learning_rate": 5e-05,
"loss": 0.2465,
"step": 5964
},
{
"epoch": 9.0,
"eval_accuracy": 0.14904856304240988,
"eval_loss": 3.8273768424987793,
"eval_runtime": 203.0149,
"eval_samples_per_second": 51.75,
"eval_steps_per_second": 1.621,
"step": 5964
},
{
"epoch": 9.99,
"learning_rate": 5e-05,
"loss": 0.1935,
"step": 6620
},
{
"epoch": 9.99,
"eval_accuracy": 0.14827322989479177,
"eval_loss": 3.979735851287842,
"eval_runtime": 202.6913,
"eval_samples_per_second": 51.833,
"eval_steps_per_second": 1.623,
"step": 6620
}
],
"max_steps": 6620,
"num_train_epochs": 10,
"total_flos": 4.681188416161841e+18,
"trial_name": null,
"trial_params": null
}
|