vintage-lavender619 commited on
Commit
dcca97a
1 Parent(s): 0345df0

End of training

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.896875
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [google/vit-large-patch16-224](https://huggingface.co/google/vit-large-patch16-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.3321
36
- - Accuracy: 0.8969
37
 
38
  ## Model description
39
 
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.909375
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [google/vit-large-patch16-224](https://huggingface.co/google/vit-large-patch16-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.3101
36
+ - Accuracy: 0.9094
37
 
38
  ## Model description
39
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 4.842105263157895,
3
- "eval_accuracy": 0.9564068692206077,
4
- "eval_loss": 0.145552858710289,
5
- "eval_runtime": 13.7179,
6
- "eval_samples_per_second": 55.183,
7
- "eval_steps_per_second": 1.75,
8
- "total_flos": 4.0192166172247327e+18,
9
- "train_loss": 0.28673774859179624,
10
- "train_runtime": 831.1321,
11
- "train_samples_per_second": 18.216,
12
- "train_steps_per_second": 0.138
13
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 0.909375,
4
+ "eval_loss": 0.3100855350494385,
5
+ "eval_runtime": 6.5793,
6
+ "eval_samples_per_second": 48.637,
7
+ "eval_steps_per_second": 1.52,
8
+ "total_flos": 1.0519143604184678e+19,
9
+ "train_loss": 0.31787962436676026,
10
+ "train_runtime": 2532.1955,
11
+ "train_samples_per_second": 15.165,
12
+ "train_steps_per_second": 0.118
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.842105263157895,
3
- "eval_accuracy": 0.9564068692206077,
4
- "eval_loss": 0.145552858710289,
5
- "eval_runtime": 13.7179,
6
- "eval_samples_per_second": 55.183,
7
- "eval_steps_per_second": 1.75
8
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 0.909375,
4
+ "eval_loss": 0.3100855350494385,
5
+ "eval_runtime": 6.5793,
6
+ "eval_samples_per_second": 48.637,
7
+ "eval_steps_per_second": 1.52
8
  }
runs/Jun10_10-42-57_4c61f7eac1f1/events.out.tfevents.1718018770.4c61f7eac1f1.793.12 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6abc79accf7e7043b49971c1bd7cf46714717081403ae3511762de69df8c74ee
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.842105263157895,
3
- "total_flos": 4.0192166172247327e+18,
4
- "train_loss": 0.28673774859179624,
5
- "train_runtime": 831.1321,
6
- "train_samples_per_second": 18.216,
7
- "train_steps_per_second": 0.138
8
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "total_flos": 1.0519143604184678e+19,
4
+ "train_loss": 0.31787962436676026,
5
+ "train_runtime": 2532.1955,
6
+ "train_samples_per_second": 15.165,
7
+ "train_steps_per_second": 0.118
8
  }
trainer_state.json CHANGED
@@ -1,149 +1,507 @@
1
  {
2
- "best_metric": 0.9564068692206077,
3
- "best_model_checkpoint": "vit-large-patch16-224-finetuned-landscape-test/checkpoint-95",
4
- "epoch": 4.842105263157895,
5
  "eval_steps": 500,
6
- "global_step": 115,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.42105263157894735,
13
- "grad_norm": 2.8438663482666016,
14
- "learning_rate": 4.166666666666667e-05,
15
- "loss": 1.483,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.8421052631578947,
20
- "grad_norm": 1.8428654670715332,
21
- "learning_rate": 4.611650485436894e-05,
22
- "loss": 0.3395,
 
 
 
 
 
 
 
 
 
23
  "step": 20
24
  },
25
  {
26
- "epoch": 0.968421052631579,
27
- "eval_accuracy": 0.9379128137384413,
28
- "eval_loss": 0.18436767160892487,
29
- "eval_runtime": 13.7525,
30
- "eval_samples_per_second": 55.045,
31
- "eval_steps_per_second": 1.745,
32
- "step": 23
 
 
 
 
 
 
 
33
  },
34
  {
35
- "epoch": 1.263157894736842,
36
- "grad_norm": 2.5257349014282227,
37
- "learning_rate": 4.12621359223301e-05,
38
- "loss": 0.2331,
 
 
39
  "step": 30
40
  },
41
  {
42
- "epoch": 1.6842105263157894,
43
- "grad_norm": 2.2784433364868164,
44
- "learning_rate": 3.6407766990291265e-05,
45
- "loss": 0.2125,
 
 
 
 
 
 
 
 
 
46
  "step": 40
47
  },
48
  {
49
- "epoch": 1.9789473684210526,
50
- "eval_accuracy": 0.9365918097754293,
51
- "eval_loss": 0.16518810391426086,
52
- "eval_runtime": 13.7671,
53
- "eval_samples_per_second": 54.986,
54
- "eval_steps_per_second": 1.743,
55
- "step": 47
56
  },
57
  {
58
- "epoch": 2.1052631578947367,
59
- "grad_norm": 0.9819332957267761,
60
- "learning_rate": 3.155339805825243e-05,
61
- "loss": 0.2093,
 
 
62
  "step": 50
63
  },
64
  {
65
- "epoch": 2.526315789473684,
66
- "grad_norm": 2.5721793174743652,
67
- "learning_rate": 2.6699029126213593e-05,
68
- "loss": 0.1478,
69
  "step": 60
70
  },
71
  {
72
- "epoch": 2.9473684210526314,
73
- "grad_norm": 1.9163882732391357,
74
- "learning_rate": 2.1844660194174756e-05,
75
- "loss": 0.1725,
 
 
 
 
 
 
 
 
 
76
  "step": 70
77
  },
78
  {
79
- "epoch": 2.9894736842105263,
80
- "eval_accuracy": 0.9498018494055482,
81
- "eval_loss": 0.1384432017803192,
82
- "eval_runtime": 13.786,
83
- "eval_samples_per_second": 54.911,
84
- "eval_steps_per_second": 1.741,
85
- "step": 71
 
 
 
 
 
 
 
86
  },
87
  {
88
- "epoch": 3.3684210526315788,
89
- "grad_norm": 1.7644811868667603,
90
- "learning_rate": 1.6990291262135926e-05,
91
- "loss": 0.1155,
 
 
92
  "step": 80
93
  },
94
  {
95
- "epoch": 3.7894736842105265,
96
- "grad_norm": 1.712578296661377,
97
- "learning_rate": 1.2135922330097088e-05,
98
- "loss": 0.1371,
99
  "step": 90
100
  },
101
  {
102
- "epoch": 4.0,
103
- "eval_accuracy": 0.9564068692206077,
104
- "eval_loss": 0.145552858710289,
105
- "eval_runtime": 13.7797,
106
- "eval_samples_per_second": 54.936,
107
- "eval_steps_per_second": 1.742,
108
- "step": 95
109
- },
110
- {
111
- "epoch": 4.2105263157894735,
112
- "grad_norm": 1.6249809265136719,
113
- "learning_rate": 7.281553398058253e-06,
114
- "loss": 0.1134,
 
 
 
 
 
 
 
 
 
115
  "step": 100
116
  },
117
  {
118
- "epoch": 4.631578947368421,
119
- "grad_norm": 0.8358253240585327,
120
- "learning_rate": 2.4271844660194174e-06,
121
- "loss": 0.096,
 
 
 
 
 
 
 
 
 
122
  "step": 110
123
  },
124
  {
125
- "epoch": 4.842105263157895,
126
- "eval_accuracy": 0.952443857331572,
127
- "eval_loss": 0.14051873981952667,
128
- "eval_runtime": 13.8352,
129
- "eval_samples_per_second": 54.716,
130
- "eval_steps_per_second": 1.735,
131
- "step": 115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  },
133
  {
134
- "epoch": 4.842105263157895,
135
- "step": 115,
136
- "total_flos": 4.0192166172247327e+18,
137
- "train_loss": 0.28673774859179624,
138
- "train_runtime": 831.1321,
139
- "train_samples_per_second": 18.216,
140
- "train_steps_per_second": 0.138
141
  }
142
  ],
143
  "logging_steps": 10,
144
- "max_steps": 115,
145
  "num_input_tokens_seen": 0,
146
- "num_train_epochs": 5,
147
  "save_steps": 500,
148
  "stateful_callbacks": {
149
  "TrainerControl": {
@@ -157,7 +515,7 @@
157
  "attributes": {}
158
  }
159
  },
160
- "total_flos": 4.0192166172247327e+18,
161
  "train_batch_size": 32,
162
  "trial_name": null,
163
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.909375,
3
+ "best_model_checkpoint": "vit-large-patch16-224-finetuned-landscape-test/checkpoint-270",
4
+ "epoch": 30.0,
5
  "eval_steps": 500,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
+ "grad_norm": 4.163173198699951,
14
+ "learning_rate": 1.6666666666666667e-05,
15
+ "loss": 1.3906,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.496875,
21
+ "eval_loss": 1.1521408557891846,
22
+ "eval_runtime": 6.5609,
23
+ "eval_samples_per_second": 48.774,
24
+ "eval_steps_per_second": 1.524,
25
+ "step": 10
26
+ },
27
+ {
28
+ "epoch": 2.0,
29
+ "grad_norm": 5.007761478424072,
30
+ "learning_rate": 3.3333333333333335e-05,
31
+ "loss": 0.914,
32
  "step": 20
33
  },
34
  {
35
+ "epoch": 2.0,
36
+ "eval_accuracy": 0.66875,
37
+ "eval_loss": 0.781201183795929,
38
+ "eval_runtime": 6.7058,
39
+ "eval_samples_per_second": 47.72,
40
+ "eval_steps_per_second": 1.491,
41
+ "step": 20
42
+ },
43
+ {
44
+ "epoch": 3.0,
45
+ "grad_norm": 6.4083170890808105,
46
+ "learning_rate": 5e-05,
47
+ "loss": 0.6704,
48
+ "step": 30
49
  },
50
  {
51
+ "epoch": 3.0,
52
+ "eval_accuracy": 0.76875,
53
+ "eval_loss": 0.556643009185791,
54
+ "eval_runtime": 6.6958,
55
+ "eval_samples_per_second": 47.791,
56
+ "eval_steps_per_second": 1.493,
57
  "step": 30
58
  },
59
  {
60
+ "epoch": 4.0,
61
+ "grad_norm": 2.2590925693511963,
62
+ "learning_rate": 4.814814814814815e-05,
63
+ "loss": 0.4609,
64
+ "step": 40
65
+ },
66
+ {
67
+ "epoch": 4.0,
68
+ "eval_accuracy": 0.83125,
69
+ "eval_loss": 0.4362529218196869,
70
+ "eval_runtime": 6.6327,
71
+ "eval_samples_per_second": 48.245,
72
+ "eval_steps_per_second": 1.508,
73
  "step": 40
74
  },
75
  {
76
+ "epoch": 5.0,
77
+ "grad_norm": 2.843147039413452,
78
+ "learning_rate": 4.62962962962963e-05,
79
+ "loss": 0.404,
80
+ "step": 50
 
 
81
  },
82
  {
83
+ "epoch": 5.0,
84
+ "eval_accuracy": 0.815625,
85
+ "eval_loss": 0.4806601405143738,
86
+ "eval_runtime": 6.7484,
87
+ "eval_samples_per_second": 47.418,
88
+ "eval_steps_per_second": 1.482,
89
  "step": 50
90
  },
91
  {
92
+ "epoch": 6.0,
93
+ "grad_norm": 3.1339221000671387,
94
+ "learning_rate": 4.4444444444444447e-05,
95
+ "loss": 0.3948,
96
  "step": 60
97
  },
98
  {
99
+ "epoch": 6.0,
100
+ "eval_accuracy": 0.853125,
101
+ "eval_loss": 0.4215615689754486,
102
+ "eval_runtime": 6.7585,
103
+ "eval_samples_per_second": 47.348,
104
+ "eval_steps_per_second": 1.48,
105
+ "step": 60
106
+ },
107
+ {
108
+ "epoch": 7.0,
109
+ "grad_norm": 1.822701096534729,
110
+ "learning_rate": 4.259259259259259e-05,
111
+ "loss": 0.3535,
112
  "step": 70
113
  },
114
  {
115
+ "epoch": 7.0,
116
+ "eval_accuracy": 0.86875,
117
+ "eval_loss": 0.32805871963500977,
118
+ "eval_runtime": 6.7133,
119
+ "eval_samples_per_second": 47.666,
120
+ "eval_steps_per_second": 1.49,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 8.0,
125
+ "grad_norm": 2.6920511722564697,
126
+ "learning_rate": 4.074074074074074e-05,
127
+ "loss": 0.3107,
128
+ "step": 80
129
  },
130
  {
131
+ "epoch": 8.0,
132
+ "eval_accuracy": 0.9,
133
+ "eval_loss": 0.2971627116203308,
134
+ "eval_runtime": 6.6598,
135
+ "eval_samples_per_second": 48.049,
136
+ "eval_steps_per_second": 1.502,
137
  "step": 80
138
  },
139
  {
140
+ "epoch": 9.0,
141
+ "grad_norm": 3.074315309524536,
142
+ "learning_rate": 3.888888888888889e-05,
143
+ "loss": 0.3086,
144
  "step": 90
145
  },
146
  {
147
+ "epoch": 9.0,
148
+ "eval_accuracy": 0.88125,
149
+ "eval_loss": 0.332777202129364,
150
+ "eval_runtime": 6.6439,
151
+ "eval_samples_per_second": 48.165,
152
+ "eval_steps_per_second": 1.505,
153
+ "step": 90
154
+ },
155
+ {
156
+ "epoch": 10.0,
157
+ "grad_norm": 1.8536041975021362,
158
+ "learning_rate": 3.7037037037037037e-05,
159
+ "loss": 0.2564,
160
+ "step": 100
161
+ },
162
+ {
163
+ "epoch": 10.0,
164
+ "eval_accuracy": 0.8875,
165
+ "eval_loss": 0.35169774293899536,
166
+ "eval_runtime": 6.7081,
167
+ "eval_samples_per_second": 47.704,
168
+ "eval_steps_per_second": 1.491,
169
  "step": 100
170
  },
171
  {
172
+ "epoch": 11.0,
173
+ "grad_norm": 2.0643813610076904,
174
+ "learning_rate": 3.518518518518519e-05,
175
+ "loss": 0.2654,
176
+ "step": 110
177
+ },
178
+ {
179
+ "epoch": 11.0,
180
+ "eval_accuracy": 0.859375,
181
+ "eval_loss": 0.39852675795555115,
182
+ "eval_runtime": 6.6638,
183
+ "eval_samples_per_second": 48.021,
184
+ "eval_steps_per_second": 1.501,
185
  "step": 110
186
  },
187
  {
188
+ "epoch": 12.0,
189
+ "grad_norm": 2.567340135574341,
190
+ "learning_rate": 3.3333333333333335e-05,
191
+ "loss": 0.2733,
192
+ "step": 120
193
+ },
194
+ {
195
+ "epoch": 12.0,
196
+ "eval_accuracy": 0.90625,
197
+ "eval_loss": 0.28696078062057495,
198
+ "eval_runtime": 6.66,
199
+ "eval_samples_per_second": 48.048,
200
+ "eval_steps_per_second": 1.501,
201
+ "step": 120
202
+ },
203
+ {
204
+ "epoch": 13.0,
205
+ "grad_norm": 2.107881546020508,
206
+ "learning_rate": 3.148148148148148e-05,
207
+ "loss": 0.2511,
208
+ "step": 130
209
+ },
210
+ {
211
+ "epoch": 13.0,
212
+ "eval_accuracy": 0.8875,
213
+ "eval_loss": 0.41765227913856506,
214
+ "eval_runtime": 6.7346,
215
+ "eval_samples_per_second": 47.516,
216
+ "eval_steps_per_second": 1.485,
217
+ "step": 130
218
+ },
219
+ {
220
+ "epoch": 14.0,
221
+ "grad_norm": 2.686899423599243,
222
+ "learning_rate": 2.962962962962963e-05,
223
+ "loss": 0.2762,
224
+ "step": 140
225
+ },
226
+ {
227
+ "epoch": 14.0,
228
+ "eval_accuracy": 0.89375,
229
+ "eval_loss": 0.35794347524642944,
230
+ "eval_runtime": 6.6606,
231
+ "eval_samples_per_second": 48.044,
232
+ "eval_steps_per_second": 1.501,
233
+ "step": 140
234
+ },
235
+ {
236
+ "epoch": 15.0,
237
+ "grad_norm": 1.5542501211166382,
238
+ "learning_rate": 2.777777777777778e-05,
239
+ "loss": 0.2188,
240
+ "step": 150
241
+ },
242
+ {
243
+ "epoch": 15.0,
244
+ "eval_accuracy": 0.890625,
245
+ "eval_loss": 0.33477360010147095,
246
+ "eval_runtime": 6.6577,
247
+ "eval_samples_per_second": 48.064,
248
+ "eval_steps_per_second": 1.502,
249
+ "step": 150
250
+ },
251
+ {
252
+ "epoch": 16.0,
253
+ "grad_norm": 1.8425265550613403,
254
+ "learning_rate": 2.5925925925925925e-05,
255
+ "loss": 0.2265,
256
+ "step": 160
257
+ },
258
+ {
259
+ "epoch": 16.0,
260
+ "eval_accuracy": 0.903125,
261
+ "eval_loss": 0.3045758605003357,
262
+ "eval_runtime": 6.6513,
263
+ "eval_samples_per_second": 48.111,
264
+ "eval_steps_per_second": 1.503,
265
+ "step": 160
266
+ },
267
+ {
268
+ "epoch": 17.0,
269
+ "grad_norm": 1.9955267906188965,
270
+ "learning_rate": 2.4074074074074074e-05,
271
+ "loss": 0.2054,
272
+ "step": 170
273
+ },
274
+ {
275
+ "epoch": 17.0,
276
+ "eval_accuracy": 0.896875,
277
+ "eval_loss": 0.33053359389305115,
278
+ "eval_runtime": 6.639,
279
+ "eval_samples_per_second": 48.2,
280
+ "eval_steps_per_second": 1.506,
281
+ "step": 170
282
+ },
283
+ {
284
+ "epoch": 18.0,
285
+ "grad_norm": 2.128180980682373,
286
+ "learning_rate": 2.2222222222222223e-05,
287
+ "loss": 0.1951,
288
+ "step": 180
289
+ },
290
+ {
291
+ "epoch": 18.0,
292
+ "eval_accuracy": 0.88125,
293
+ "eval_loss": 0.3575904369354248,
294
+ "eval_runtime": 6.6576,
295
+ "eval_samples_per_second": 48.065,
296
+ "eval_steps_per_second": 1.502,
297
+ "step": 180
298
+ },
299
+ {
300
+ "epoch": 19.0,
301
+ "grad_norm": 1.225751280784607,
302
+ "learning_rate": 2.037037037037037e-05,
303
+ "loss": 0.1762,
304
+ "step": 190
305
+ },
306
+ {
307
+ "epoch": 19.0,
308
+ "eval_accuracy": 0.88125,
309
+ "eval_loss": 0.39847835898399353,
310
+ "eval_runtime": 6.6313,
311
+ "eval_samples_per_second": 48.256,
312
+ "eval_steps_per_second": 1.508,
313
+ "step": 190
314
+ },
315
+ {
316
+ "epoch": 20.0,
317
+ "grad_norm": 2.6766974925994873,
318
+ "learning_rate": 1.8518518518518518e-05,
319
+ "loss": 0.2264,
320
+ "step": 200
321
+ },
322
+ {
323
+ "epoch": 20.0,
324
+ "eval_accuracy": 0.903125,
325
+ "eval_loss": 0.3710865080356598,
326
+ "eval_runtime": 6.7413,
327
+ "eval_samples_per_second": 47.469,
328
+ "eval_steps_per_second": 1.483,
329
+ "step": 200
330
+ },
331
+ {
332
+ "epoch": 21.0,
333
+ "grad_norm": 1.814340591430664,
334
+ "learning_rate": 1.6666666666666667e-05,
335
+ "loss": 0.1958,
336
+ "step": 210
337
+ },
338
+ {
339
+ "epoch": 21.0,
340
+ "eval_accuracy": 0.8875,
341
+ "eval_loss": 0.32590168714523315,
342
+ "eval_runtime": 6.6661,
343
+ "eval_samples_per_second": 48.004,
344
+ "eval_steps_per_second": 1.5,
345
+ "step": 210
346
+ },
347
+ {
348
+ "epoch": 22.0,
349
+ "grad_norm": 1.8976715803146362,
350
+ "learning_rate": 1.4814814814814815e-05,
351
+ "loss": 0.1765,
352
+ "step": 220
353
+ },
354
+ {
355
+ "epoch": 22.0,
356
+ "eval_accuracy": 0.89375,
357
+ "eval_loss": 0.380399614572525,
358
+ "eval_runtime": 6.7489,
359
+ "eval_samples_per_second": 47.415,
360
+ "eval_steps_per_second": 1.482,
361
+ "step": 220
362
+ },
363
+ {
364
+ "epoch": 23.0,
365
+ "grad_norm": 2.2366738319396973,
366
+ "learning_rate": 1.2962962962962962e-05,
367
+ "loss": 0.1859,
368
+ "step": 230
369
+ },
370
+ {
371
+ "epoch": 23.0,
372
+ "eval_accuracy": 0.9,
373
+ "eval_loss": 0.34643369913101196,
374
+ "eval_runtime": 6.6848,
375
+ "eval_samples_per_second": 47.87,
376
+ "eval_steps_per_second": 1.496,
377
+ "step": 230
378
+ },
379
+ {
380
+ "epoch": 24.0,
381
+ "grad_norm": 1.6862928867340088,
382
+ "learning_rate": 1.1111111111111112e-05,
383
+ "loss": 0.1915,
384
+ "step": 240
385
+ },
386
+ {
387
+ "epoch": 24.0,
388
+ "eval_accuracy": 0.890625,
389
+ "eval_loss": 0.3741697371006012,
390
+ "eval_runtime": 6.6221,
391
+ "eval_samples_per_second": 48.323,
392
+ "eval_steps_per_second": 1.51,
393
+ "step": 240
394
+ },
395
+ {
396
+ "epoch": 25.0,
397
+ "grad_norm": 1.770437240600586,
398
+ "learning_rate": 9.259259259259259e-06,
399
+ "loss": 0.1667,
400
+ "step": 250
401
+ },
402
+ {
403
+ "epoch": 25.0,
404
+ "eval_accuracy": 0.90625,
405
+ "eval_loss": 0.3199695944786072,
406
+ "eval_runtime": 6.6194,
407
+ "eval_samples_per_second": 48.343,
408
+ "eval_steps_per_second": 1.511,
409
+ "step": 250
410
+ },
411
+ {
412
+ "epoch": 26.0,
413
+ "grad_norm": 1.6922975778579712,
414
+ "learning_rate": 7.4074074074074075e-06,
415
+ "loss": 0.1744,
416
+ "step": 260
417
+ },
418
+ {
419
+ "epoch": 26.0,
420
+ "eval_accuracy": 0.89375,
421
+ "eval_loss": 0.3544948101043701,
422
+ "eval_runtime": 6.6657,
423
+ "eval_samples_per_second": 48.007,
424
+ "eval_steps_per_second": 1.5,
425
+ "step": 260
426
+ },
427
+ {
428
+ "epoch": 27.0,
429
+ "grad_norm": 1.4301420450210571,
430
+ "learning_rate": 5.555555555555556e-06,
431
+ "loss": 0.1595,
432
+ "step": 270
433
+ },
434
+ {
435
+ "epoch": 27.0,
436
+ "eval_accuracy": 0.909375,
437
+ "eval_loss": 0.3100855350494385,
438
+ "eval_runtime": 6.6671,
439
+ "eval_samples_per_second": 47.997,
440
+ "eval_steps_per_second": 1.5,
441
+ "step": 270
442
+ },
443
+ {
444
+ "epoch": 28.0,
445
+ "grad_norm": 1.7006909847259521,
446
+ "learning_rate": 3.7037037037037037e-06,
447
+ "loss": 0.1793,
448
+ "step": 280
449
+ },
450
+ {
451
+ "epoch": 28.0,
452
+ "eval_accuracy": 0.896875,
453
+ "eval_loss": 0.3230052888393402,
454
+ "eval_runtime": 6.6338,
455
+ "eval_samples_per_second": 48.238,
456
+ "eval_steps_per_second": 1.507,
457
+ "step": 280
458
+ },
459
+ {
460
+ "epoch": 29.0,
461
+ "grad_norm": 2.8452911376953125,
462
+ "learning_rate": 1.8518518518518519e-06,
463
+ "loss": 0.1596,
464
+ "step": 290
465
+ },
466
+ {
467
+ "epoch": 29.0,
468
+ "eval_accuracy": 0.9,
469
+ "eval_loss": 0.32675567269325256,
470
+ "eval_runtime": 6.6591,
471
+ "eval_samples_per_second": 48.054,
472
+ "eval_steps_per_second": 1.502,
473
+ "step": 290
474
+ },
475
+ {
476
+ "epoch": 30.0,
477
+ "grad_norm": 2.136308431625366,
478
+ "learning_rate": 0.0,
479
+ "loss": 0.169,
480
+ "step": 300
481
+ },
482
+ {
483
+ "epoch": 30.0,
484
+ "eval_accuracy": 0.896875,
485
+ "eval_loss": 0.3321378529071808,
486
+ "eval_runtime": 6.6284,
487
+ "eval_samples_per_second": 48.277,
488
+ "eval_steps_per_second": 1.509,
489
+ "step": 300
490
  },
491
  {
492
+ "epoch": 30.0,
493
+ "step": 300,
494
+ "total_flos": 1.0519143604184678e+19,
495
+ "train_loss": 0.31787962436676026,
496
+ "train_runtime": 2532.1955,
497
+ "train_samples_per_second": 15.165,
498
+ "train_steps_per_second": 0.118
499
  }
500
  ],
501
  "logging_steps": 10,
502
+ "max_steps": 300,
503
  "num_input_tokens_seen": 0,
504
+ "num_train_epochs": 30,
505
  "save_steps": 500,
506
  "stateful_callbacks": {
507
  "TrainerControl": {
 
515
  "attributes": {}
516
  }
517
  },
518
+ "total_flos": 1.0519143604184678e+19,
519
  "train_batch_size": 32,
520
  "trial_name": null,
521
  "trial_params": null