mjphayes commited on
Commit
52b3af7
1 Parent(s): 380c933

mjphayes/vit-elpv-augmented

Browse files
README.md CHANGED
@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.7721
21
- - Accuracy: 0.6929
22
 
23
  ## Model description
24
 
@@ -46,27 +46,31 @@ The following hyperparameters were used during training:
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
48
  - lr_scheduler_warmup_ratio: 0.1
49
- - num_epochs: 10
50
 
51
  ### Training results
52
 
53
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
54
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
55
- | No log | 1.0 | 69 | 1.0376 | 0.5939 |
56
- | No log | 2.0 | 138 | 0.9578 | 0.5914 |
57
- | No log | 3.0 | 207 | 0.9083 | 0.6447 |
58
- | No log | 4.0 | 276 | 0.8320 | 0.6751 |
59
- | No log | 5.0 | 345 | 0.8832 | 0.6371 |
60
- | No log | 6.0 | 414 | 0.8498 | 0.6574 |
61
- | No log | 7.0 | 483 | 0.8124 | 0.6701 |
62
- | 0.7809 | 8.0 | 552 | 0.8627 | 0.6701 |
63
- | 0.7809 | 9.0 | 621 | 0.8003 | 0.6701 |
64
- | 0.7809 | 10.0 | 690 | 0.7721 | 0.6929 |
 
 
 
 
65
 
66
 
67
  ### Framework versions
68
 
69
- - Transformers 4.35.0
70
- - Pytorch 2.1.0
71
- - Datasets 2.14.6
72
- - Tokenizers 0.14.1
 
17
 
18
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.7687
21
+ - Accuracy: 0.7259
22
 
23
  ## Model description
24
 
 
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
48
  - lr_scheduler_warmup_ratio: 0.1
49
+ - num_epochs: 14
50
 
51
  ### Training results
52
 
53
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
54
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
55
+ | No log | 1.0 | 69 | 1.1146 | 0.5787 |
56
+ | No log | 2.0 | 138 | 0.9812 | 0.5787 |
57
+ | No log | 3.0 | 207 | 0.8885 | 0.6472 |
58
+ | No log | 4.0 | 276 | 0.7930 | 0.7081 |
59
+ | No log | 5.0 | 345 | 0.8019 | 0.6929 |
60
+ | No log | 6.0 | 414 | 0.8009 | 0.6878 |
61
+ | No log | 7.0 | 483 | 0.7984 | 0.6853 |
62
+ | 0.8194 | 8.0 | 552 | 0.7714 | 0.7107 |
63
+ | 0.8194 | 9.0 | 621 | 0.7667 | 0.7081 |
64
+ | 0.8194 | 10.0 | 690 | 0.7303 | 0.7234 |
65
+ | 0.8194 | 11.0 | 759 | 0.7321 | 0.7284 |
66
+ | 0.8194 | 12.0 | 828 | 0.7373 | 0.7335 |
67
+ | 0.8194 | 13.0 | 897 | 0.8051 | 0.6904 |
68
+ | 0.8194 | 14.0 | 966 | 0.7687 | 0.7259 |
69
 
70
 
71
  ### Framework versions
72
 
73
+ - Transformers 4.35.2
74
+ - Pytorch 2.1.1+cu121
75
+ - Datasets 2.15.0
76
+ - Tokenizers 0.15.0
all_results.json CHANGED
@@ -1,7 +1,8 @@
1
  {
2
- "epoch": 10.0,
3
- "train_loss": 0.7178522997889025,
4
- "train_runtime": 2833.88,
5
- "train_samples_per_second": 6.514,
6
- "train_steps_per_second": 0.102
 
7
  }
 
1
  {
2
+ "epoch": 14.0,
3
+ "total_flos": 4.790950643943604e+18,
4
+ "train_loss": 0.63200661163646,
5
+ "train_runtime": 1457.5958,
6
+ "train_samples_per_second": 42.415,
7
+ "train_steps_per_second": 0.663
8
  }
config.json CHANGED
@@ -32,5 +32,5 @@
32
  "problem_type": "single_label_classification",
33
  "qkv_bias": true,
34
  "torch_dtype": "float32",
35
- "transformers_version": "4.35.0"
36
  }
 
32
  "problem_type": "single_label_classification",
33
  "qkv_bias": true,
34
  "torch_dtype": "float32",
35
+ "transformers_version": "4.35.2"
36
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b82d916b8466c58a9e22666cb75066ac16bfe1547114c247120c5dcf6d4a7f83
3
  size 343230128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d29546920076aeb62358d5f3b4a15298c89fc5581b645c9cb4015ab1832aeedd
3
  size 343230128
runs/Nov16_09-36-14_nk7ic4m731/events.out.tfevents.1700127380.nk7ic4m731.228.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:328f29abe32d555dc51c43e7580c4157a946ef8373f06e5e3f38e381640c3278
3
+ size 9262
train_results.json CHANGED
@@ -1,7 +1,8 @@
1
  {
2
- "epoch": 10.0,
3
- "train_loss": 0.7178522997889025,
4
- "train_runtime": 2833.88,
5
- "train_samples_per_second": 6.514,
6
- "train_steps_per_second": 0.102
 
7
  }
 
1
  {
2
+ "epoch": 14.0,
3
+ "total_flos": 4.790950643943604e+18,
4
+ "train_loss": 0.63200661163646,
5
+ "train_runtime": 1457.5958,
6
+ "train_samples_per_second": 42.415,
7
+ "train_steps_per_second": 0.663
8
  }
trainer_state.json CHANGED
@@ -1,292 +1,160 @@
1
  {
2
- "best_metric": 0.7518796992481203,
3
- "best_model_checkpoint": "../results/elpv-vit/checkpoint-232",
4
- "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 290,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.34,
13
- "learning_rate": 1.7241379310344828e-05,
14
- "loss": 1.3357,
15
- "step": 10
16
- },
17
- {
18
- "epoch": 0.69,
19
- "learning_rate": 3.4482758620689657e-05,
20
- "loss": 1.1598,
21
- "step": 20
22
- },
23
  {
24
  "epoch": 1.0,
25
- "eval_accuracy": 0.6954887218045113,
26
- "eval_loss": 0.9091870188713074,
27
- "eval_runtime": 10.3022,
28
- "eval_samples_per_second": 25.82,
29
- "eval_steps_per_second": 1.65,
30
- "step": 29
31
- },
32
- {
33
- "epoch": 1.03,
34
- "learning_rate": 4.980842911877395e-05,
35
- "loss": 0.9746,
36
- "step": 30
37
- },
38
- {
39
- "epoch": 1.38,
40
- "learning_rate": 4.789272030651341e-05,
41
- "loss": 0.8597,
42
- "step": 40
43
- },
44
- {
45
- "epoch": 1.72,
46
- "learning_rate": 4.597701149425287e-05,
47
- "loss": 0.8226,
48
- "step": 50
49
  },
50
  {
51
  "epoch": 2.0,
52
- "eval_accuracy": 0.7368421052631579,
53
- "eval_loss": 0.7852613925933838,
54
- "eval_runtime": 10.1106,
55
- "eval_samples_per_second": 26.309,
56
- "eval_steps_per_second": 1.681,
57
- "step": 58
58
- },
59
- {
60
- "epoch": 2.07,
61
- "learning_rate": 4.406130268199234e-05,
62
- "loss": 0.7585,
63
- "step": 60
64
- },
65
- {
66
- "epoch": 2.41,
67
- "learning_rate": 4.21455938697318e-05,
68
- "loss": 0.7611,
69
- "step": 70
70
- },
71
- {
72
- "epoch": 2.76,
73
- "learning_rate": 4.0229885057471265e-05,
74
- "loss": 0.6841,
75
- "step": 80
76
  },
77
  {
78
  "epoch": 3.0,
79
- "eval_accuracy": 0.7180451127819549,
80
- "eval_loss": 0.811350405216217,
81
- "eval_runtime": 13.3447,
82
- "eval_samples_per_second": 19.933,
83
- "eval_steps_per_second": 1.274,
84
- "step": 87
85
- },
86
- {
87
- "epoch": 3.1,
88
- "learning_rate": 3.831417624521073e-05,
89
- "loss": 0.7207,
90
- "step": 90
91
- },
92
- {
93
- "epoch": 3.45,
94
- "learning_rate": 3.6398467432950195e-05,
95
- "loss": 0.7422,
96
- "step": 100
97
- },
98
- {
99
- "epoch": 3.79,
100
- "learning_rate": 3.4482758620689657e-05,
101
- "loss": 0.73,
102
- "step": 110
103
  },
104
  {
105
  "epoch": 4.0,
106
- "eval_accuracy": 0.7293233082706767,
107
- "eval_loss": 0.7547905445098877,
108
- "eval_runtime": 11.7612,
109
- "eval_samples_per_second": 22.617,
110
- "eval_steps_per_second": 1.445,
111
- "step": 116
112
- },
113
- {
114
- "epoch": 4.14,
115
- "learning_rate": 3.256704980842912e-05,
116
- "loss": 0.6787,
117
- "step": 120
118
- },
119
- {
120
- "epoch": 4.48,
121
- "learning_rate": 3.065134099616858e-05,
122
- "loss": 0.6738,
123
- "step": 130
124
- },
125
- {
126
- "epoch": 4.83,
127
- "learning_rate": 2.8735632183908045e-05,
128
- "loss": 0.6725,
129
- "step": 140
130
  },
131
  {
132
  "epoch": 5.0,
133
- "eval_accuracy": 0.7406015037593985,
134
- "eval_loss": 0.75775146484375,
135
- "eval_runtime": 12.9654,
136
- "eval_samples_per_second": 20.516,
137
- "eval_steps_per_second": 1.311,
138
- "step": 145
139
- },
140
- {
141
- "epoch": 5.17,
142
- "learning_rate": 2.681992337164751e-05,
143
- "loss": 0.6785,
144
- "step": 150
145
- },
146
- {
147
- "epoch": 5.52,
148
- "learning_rate": 2.4904214559386975e-05,
149
- "loss": 0.6259,
150
- "step": 160
151
- },
152
- {
153
- "epoch": 5.86,
154
- "learning_rate": 2.2988505747126437e-05,
155
- "loss": 0.6625,
156
- "step": 170
157
  },
158
  {
159
  "epoch": 6.0,
160
- "eval_accuracy": 0.7368421052631579,
161
- "eval_loss": 0.732495129108429,
162
- "eval_runtime": 14.6428,
163
- "eval_samples_per_second": 18.166,
164
- "eval_steps_per_second": 1.161,
165
- "step": 174
166
- },
167
- {
168
- "epoch": 6.21,
169
- "learning_rate": 2.10727969348659e-05,
170
- "loss": 0.6335,
171
- "step": 180
172
- },
173
- {
174
- "epoch": 6.55,
175
- "learning_rate": 1.9157088122605367e-05,
176
- "loss": 0.6216,
177
- "step": 190
178
- },
179
- {
180
- "epoch": 6.9,
181
- "learning_rate": 1.7241379310344828e-05,
182
- "loss": 0.6214,
183
- "step": 200
184
  },
185
  {
186
  "epoch": 7.0,
187
- "eval_accuracy": 0.7481203007518797,
188
- "eval_loss": 0.6711514592170715,
189
- "eval_runtime": 13.3792,
190
- "eval_samples_per_second": 19.882,
191
- "eval_steps_per_second": 1.271,
192
- "step": 203
193
- },
194
- {
195
- "epoch": 7.24,
196
- "learning_rate": 1.532567049808429e-05,
197
- "loss": 0.6385,
198
- "step": 210
199
  },
200
  {
201
- "epoch": 7.59,
202
- "learning_rate": 1.3409961685823755e-05,
203
- "loss": 0.6202,
204
- "step": 220
205
- },
206
- {
207
- "epoch": 7.93,
208
- "learning_rate": 1.1494252873563218e-05,
209
- "loss": 0.5911,
210
- "step": 230
211
  },
212
  {
213
  "epoch": 8.0,
214
- "eval_accuracy": 0.7518796992481203,
215
- "eval_loss": 0.6812042593955994,
216
- "eval_runtime": 14.3277,
217
- "eval_samples_per_second": 18.565,
218
- "eval_steps_per_second": 1.187,
219
- "step": 232
220
- },
221
- {
222
- "epoch": 8.28,
223
- "learning_rate": 9.578544061302683e-06,
224
- "loss": 0.5925,
225
- "step": 240
226
- },
227
- {
228
- "epoch": 8.62,
229
- "learning_rate": 7.662835249042145e-06,
230
- "loss": 0.5711,
231
- "step": 250
232
- },
233
- {
234
- "epoch": 8.97,
235
- "learning_rate": 5.747126436781609e-06,
236
- "loss": 0.6169,
237
- "step": 260
238
  },
239
  {
240
  "epoch": 9.0,
241
- "eval_accuracy": 0.7481203007518797,
242
- "eval_loss": 0.7047642469406128,
243
- "eval_runtime": 17.1552,
244
- "eval_samples_per_second": 15.505,
245
- "eval_steps_per_second": 0.991,
246
- "step": 261
247
- },
248
- {
249
- "epoch": 9.31,
250
- "learning_rate": 3.8314176245210725e-06,
251
- "loss": 0.6163,
252
- "step": 270
253
- },
254
- {
255
- "epoch": 9.66,
256
- "learning_rate": 1.9157088122605362e-06,
257
- "loss": 0.5672,
258
- "step": 280
259
- },
260
- {
261
- "epoch": 10.0,
262
- "learning_rate": 0.0,
263
- "loss": 0.5863,
264
- "step": 290
265
- },
266
- {
267
- "epoch": 10.0,
268
- "eval_accuracy": 0.7406015037593985,
269
- "eval_loss": 0.7278200387954712,
270
- "eval_runtime": 12.3926,
271
- "eval_samples_per_second": 21.464,
272
- "eval_steps_per_second": 1.372,
273
- "step": 290
274
  },
275
  {
276
  "epoch": 10.0,
277
- "step": 290,
278
- "total_flos": 1.430527770561577e+18,
279
- "train_loss": 0.7178522997889025,
280
- "train_runtime": 2833.88,
281
- "train_samples_per_second": 6.514,
282
- "train_steps_per_second": 0.102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  }
284
  ],
285
- "logging_steps": 10,
286
- "max_steps": 290,
287
- "num_train_epochs": 10,
288
  "save_steps": 500,
289
- "total_flos": 1.430527770561577e+18,
290
  "trial_name": null,
291
  "trial_params": null
292
  }
 
1
  {
2
+ "best_metric": 0.733502538071066,
3
+ "best_model_checkpoint": "../results/elpv-vit/checkpoint-828",
4
+ "epoch": 14.0,
5
  "eval_steps": 500,
6
+ "global_step": 966,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.5786802030456852,
14
+ "eval_loss": 1.1146228313446045,
15
+ "eval_runtime": 15.1595,
16
+ "eval_samples_per_second": 25.99,
17
+ "eval_steps_per_second": 1.649,
18
+ "step": 69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  },
20
  {
21
  "epoch": 2.0,
22
+ "eval_accuracy": 0.5786802030456852,
23
+ "eval_loss": 0.9812496304512024,
24
+ "eval_runtime": 4.4653,
25
+ "eval_samples_per_second": 88.237,
26
+ "eval_steps_per_second": 5.599,
27
+ "step": 138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  },
29
  {
30
  "epoch": 3.0,
31
+ "eval_accuracy": 0.6472081218274112,
32
+ "eval_loss": 0.8884502053260803,
33
+ "eval_runtime": 2.1388,
34
+ "eval_samples_per_second": 184.219,
35
+ "eval_steps_per_second": 11.689,
36
+ "step": 207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  },
38
  {
39
  "epoch": 4.0,
40
+ "eval_accuracy": 0.7081218274111675,
41
+ "eval_loss": 0.7930060625076294,
42
+ "eval_runtime": 2.1153,
43
+ "eval_samples_per_second": 186.265,
44
+ "eval_steps_per_second": 11.819,
45
+ "step": 276
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  },
47
  {
48
  "epoch": 5.0,
49
+ "eval_accuracy": 0.6928934010152284,
50
+ "eval_loss": 0.8019062280654907,
51
+ "eval_runtime": 7.7818,
52
+ "eval_samples_per_second": 50.631,
53
+ "eval_steps_per_second": 3.213,
54
+ "step": 345
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  },
56
  {
57
  "epoch": 6.0,
58
+ "eval_accuracy": 0.6878172588832487,
59
+ "eval_loss": 0.8009192943572998,
60
+ "eval_runtime": 2.0611,
61
+ "eval_samples_per_second": 191.162,
62
+ "eval_steps_per_second": 12.13,
63
+ "step": 414
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  },
65
  {
66
  "epoch": 7.0,
67
+ "eval_accuracy": 0.6852791878172588,
68
+ "eval_loss": 0.7984281182289124,
69
+ "eval_runtime": 12.8592,
70
+ "eval_samples_per_second": 30.639,
71
+ "eval_steps_per_second": 1.944,
72
+ "step": 483
 
 
 
 
 
 
73
  },
74
  {
75
+ "epoch": 7.25,
76
+ "learning_rate": 2.6812428078250863e-05,
77
+ "loss": 0.8194,
78
+ "step": 500
 
 
 
 
 
 
79
  },
80
  {
81
  "epoch": 8.0,
82
+ "eval_accuracy": 0.7106598984771574,
83
+ "eval_loss": 0.7714311480522156,
84
+ "eval_runtime": 2.123,
85
+ "eval_samples_per_second": 185.586,
86
+ "eval_steps_per_second": 11.776,
87
+ "step": 552
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  },
89
  {
90
  "epoch": 9.0,
91
+ "eval_accuracy": 0.7081218274111675,
92
+ "eval_loss": 0.7666618824005127,
93
+ "eval_runtime": 2.021,
94
+ "eval_samples_per_second": 194.952,
95
+ "eval_steps_per_second": 12.37,
96
+ "step": 621
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  },
98
  {
99
  "epoch": 10.0,
100
+ "eval_accuracy": 0.7233502538071066,
101
+ "eval_loss": 0.7302789688110352,
102
+ "eval_runtime": 14.4927,
103
+ "eval_samples_per_second": 27.186,
104
+ "eval_steps_per_second": 1.725,
105
+ "step": 690
106
+ },
107
+ {
108
+ "epoch": 11.0,
109
+ "eval_accuracy": 0.7284263959390863,
110
+ "eval_loss": 0.7321494817733765,
111
+ "eval_runtime": 2.2455,
112
+ "eval_samples_per_second": 175.465,
113
+ "eval_steps_per_second": 11.134,
114
+ "step": 759
115
+ },
116
+ {
117
+ "epoch": 12.0,
118
+ "eval_accuracy": 0.733502538071066,
119
+ "eval_loss": 0.7373437285423279,
120
+ "eval_runtime": 5.5421,
121
+ "eval_samples_per_second": 71.092,
122
+ "eval_steps_per_second": 4.511,
123
+ "step": 828
124
+ },
125
+ {
126
+ "epoch": 13.0,
127
+ "eval_accuracy": 0.6903553299492385,
128
+ "eval_loss": 0.8051439523696899,
129
+ "eval_runtime": 12.7177,
130
+ "eval_samples_per_second": 30.98,
131
+ "eval_steps_per_second": 1.966,
132
+ "step": 897
133
+ },
134
+ {
135
+ "epoch": 14.0,
136
+ "eval_accuracy": 0.7258883248730964,
137
+ "eval_loss": 0.7687421441078186,
138
+ "eval_runtime": 4.2192,
139
+ "eval_samples_per_second": 93.382,
140
+ "eval_steps_per_second": 5.925,
141
+ "step": 966
142
+ },
143
+ {
144
+ "epoch": 14.0,
145
+ "step": 966,
146
+ "total_flos": 4.790950643943604e+18,
147
+ "train_loss": 0.63200661163646,
148
+ "train_runtime": 1457.5958,
149
+ "train_samples_per_second": 42.415,
150
+ "train_steps_per_second": 0.663
151
  }
152
  ],
153
+ "logging_steps": 500,
154
+ "max_steps": 966,
155
+ "num_train_epochs": 14,
156
  "save_steps": 500,
157
+ "total_flos": 4.790950643943604e+18,
158
  "trial_name": null,
159
  "trial_params": null
160
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:321bf62f0bb828ae9b9a88c70aa7a7f83bb62da583e16d3a70442dd7fa6e30ef
3
- size 4536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec40bcb32560657f17298036bfeeb617bf1339373552d659c9a5924eb30b7635
3
+ size 4600