dima806 commited on
Commit
e862c67
·
verified ·
1 Parent(s): fdf7b20

Upload folder using huggingface_hub

Browse files
checkpoint-33509/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "dima806/man_woman_face_image_detection",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "man",
13
+ "1": "woman"
14
+ },
15
+ "image_size": 224,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "man": 0,
20
+ "woman": 1
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "model_type": "vit",
24
+ "num_attention_heads": 12,
25
+ "num_channels": 3,
26
+ "num_hidden_layers": 12,
27
+ "patch_size": 16,
28
+ "problem_type": "single_label_classification",
29
+ "qkv_bias": true,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.46.0"
32
+ }
checkpoint-33509/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90a5f09dfa56d4b30ecd35e061c7dea80d5ac10f60fd639de2654825a5b9aee8
3
+ size 343223968
checkpoint-33509/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dcdfadd0725bcea918d22e30d48486323af4a19b788364f0c11f2cb88247dfd
3
+ size 686568453
checkpoint-33509/preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTImageProcessor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
checkpoint-33509/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aeb28d1bcaca86640bcd07b93d266c4181779c48a9017079da978420880b460
3
+ size 14575
checkpoint-33509/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4591d00fc2ad5cc3c2058fe536ee73457f453a7b5d794896973192ecc627acff
3
+ size 627
checkpoint-33509/trainer_state.json ADDED
@@ -0,0 +1,572 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.042002853006124496,
3
+ "best_model_checkpoint": "man_woman_face_image_detection/checkpoint-33509",
4
+ "epoch": 7.0,
5
+ "eval_steps": 500,
6
+ "global_step": 33509,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.10444955086693128,
13
+ "grad_norm": 2.75286865234375,
14
+ "learning_rate": 9.865507038464987e-07,
15
+ "loss": 0.0606,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.20889910173386256,
20
+ "grad_norm": 0.4504856765270233,
21
+ "learning_rate": 9.716070414537195e-07,
22
+ "loss": 0.0604,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.3133486526007938,
27
+ "grad_norm": 4.492558479309082,
28
+ "learning_rate": 9.566633790609401e-07,
29
+ "loss": 0.0553,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.4177982034677251,
34
+ "grad_norm": 0.11193133890628815,
35
+ "learning_rate": 9.417197166681609e-07,
36
+ "loss": 0.0592,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.5222477543346563,
41
+ "grad_norm": 3.955646514892578,
42
+ "learning_rate": 9.267760542753818e-07,
43
+ "loss": 0.0582,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 0.6266973052015876,
48
+ "grad_norm": 0.09266256541013718,
49
+ "learning_rate": 9.118323918826025e-07,
50
+ "loss": 0.0599,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 0.7311468560685189,
55
+ "grad_norm": 0.46259480714797974,
56
+ "learning_rate": 8.968887294898233e-07,
57
+ "loss": 0.0649,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 0.8355964069354502,
62
+ "grad_norm": 0.03443637862801552,
63
+ "learning_rate": 8.819450670970441e-07,
64
+ "loss": 0.0566,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 0.9400459578023814,
69
+ "grad_norm": 2.622413158416748,
70
+ "learning_rate": 8.670014047042648e-07,
71
+ "loss": 0.0582,
72
+ "step": 4500
73
+ },
74
+ {
75
+ "epoch": 1.0,
76
+ "eval_accuracy": 0.986144295170577,
77
+ "eval_loss": 0.04410167410969734,
78
+ "eval_model_preparation_time": 0.0053,
79
+ "eval_runtime": 1053.7087,
80
+ "eval_samples_per_second": 96.919,
81
+ "eval_steps_per_second": 12.115,
82
+ "step": 4787
83
+ },
84
+ {
85
+ "epoch": 1.0444955086693126,
86
+ "grad_norm": 1.0063446760177612,
87
+ "learning_rate": 8.520577423114857e-07,
88
+ "loss": 0.0626,
89
+ "step": 5000
90
+ },
91
+ {
92
+ "epoch": 1.148945059536244,
93
+ "grad_norm": 3.5057384967803955,
94
+ "learning_rate": 8.371140799187064e-07,
95
+ "loss": 0.0584,
96
+ "step": 5500
97
+ },
98
+ {
99
+ "epoch": 1.2533946104031752,
100
+ "grad_norm": 0.10573850572109222,
101
+ "learning_rate": 8.221704175259272e-07,
102
+ "loss": 0.0576,
103
+ "step": 6000
104
+ },
105
+ {
106
+ "epoch": 1.3578441612701067,
107
+ "grad_norm": 7.020801067352295,
108
+ "learning_rate": 8.07226755133148e-07,
109
+ "loss": 0.0543,
110
+ "step": 6500
111
+ },
112
+ {
113
+ "epoch": 1.4622937121370378,
114
+ "grad_norm": 9.971087455749512,
115
+ "learning_rate": 7.922830927403687e-07,
116
+ "loss": 0.0517,
117
+ "step": 7000
118
+ },
119
+ {
120
+ "epoch": 1.566743263003969,
121
+ "grad_norm": 3.649143695831299,
122
+ "learning_rate": 7.773394303475895e-07,
123
+ "loss": 0.0542,
124
+ "step": 7500
125
+ },
126
+ {
127
+ "epoch": 1.6711928138709005,
128
+ "grad_norm": 0.7077023983001709,
129
+ "learning_rate": 7.623957679548103e-07,
130
+ "loss": 0.0576,
131
+ "step": 8000
132
+ },
133
+ {
134
+ "epoch": 1.7756423647378317,
135
+ "grad_norm": 9.549636840820312,
136
+ "learning_rate": 7.474521055620311e-07,
137
+ "loss": 0.0558,
138
+ "step": 8500
139
+ },
140
+ {
141
+ "epoch": 1.8800919156047629,
142
+ "grad_norm": 0.12657281756401062,
143
+ "learning_rate": 7.325084431692519e-07,
144
+ "loss": 0.0501,
145
+ "step": 9000
146
+ },
147
+ {
148
+ "epoch": 1.9845414664716943,
149
+ "grad_norm": 4.699319362640381,
150
+ "learning_rate": 7.175647807764726e-07,
151
+ "loss": 0.0545,
152
+ "step": 9500
153
+ },
154
+ {
155
+ "epoch": 2.0,
156
+ "eval_accuracy": 0.9864968078022796,
157
+ "eval_loss": 0.042747873812913895,
158
+ "eval_model_preparation_time": 0.0053,
159
+ "eval_runtime": 1036.1097,
160
+ "eval_samples_per_second": 98.565,
161
+ "eval_steps_per_second": 12.321,
162
+ "step": 9574
163
+ },
164
+ {
165
+ "epoch": 2.0889910173386252,
166
+ "grad_norm": 1.3633027076721191,
167
+ "learning_rate": 7.026211183836934e-07,
168
+ "loss": 0.0485,
169
+ "step": 10000
170
+ },
171
+ {
172
+ "epoch": 2.1934405682055567,
173
+ "grad_norm": 0.850628137588501,
174
+ "learning_rate": 6.876774559909142e-07,
175
+ "loss": 0.0607,
176
+ "step": 10500
177
+ },
178
+ {
179
+ "epoch": 2.297890119072488,
180
+ "grad_norm": 4.338198661804199,
181
+ "learning_rate": 6.72733793598135e-07,
182
+ "loss": 0.0541,
183
+ "step": 11000
184
+ },
185
+ {
186
+ "epoch": 2.402339669939419,
187
+ "grad_norm": 0.07052299380302429,
188
+ "learning_rate": 6.577901312053558e-07,
189
+ "loss": 0.0505,
190
+ "step": 11500
191
+ },
192
+ {
193
+ "epoch": 2.5067892208063505,
194
+ "grad_norm": 4.150068283081055,
195
+ "learning_rate": 6.428464688125765e-07,
196
+ "loss": 0.0546,
197
+ "step": 12000
198
+ },
199
+ {
200
+ "epoch": 2.611238771673282,
201
+ "grad_norm": 0.10252227634191513,
202
+ "learning_rate": 6.279028064197973e-07,
203
+ "loss": 0.0498,
204
+ "step": 12500
205
+ },
206
+ {
207
+ "epoch": 2.7156883225402133,
208
+ "grad_norm": 0.12012413889169693,
209
+ "learning_rate": 6.12959144027018e-07,
210
+ "loss": 0.0459,
211
+ "step": 13000
212
+ },
213
+ {
214
+ "epoch": 2.8201378734071443,
215
+ "grad_norm": 1.6249473094940186,
216
+ "learning_rate": 5.980154816342389e-07,
217
+ "loss": 0.0469,
218
+ "step": 13500
219
+ },
220
+ {
221
+ "epoch": 2.9245874242740757,
222
+ "grad_norm": 2.401695489883423,
223
+ "learning_rate": 5.830718192414597e-07,
224
+ "loss": 0.051,
225
+ "step": 14000
226
+ },
227
+ {
228
+ "epoch": 3.0,
229
+ "eval_accuracy": 0.9866926481532254,
230
+ "eval_loss": 0.042268697172403336,
231
+ "eval_model_preparation_time": 0.0053,
232
+ "eval_runtime": 1031.115,
233
+ "eval_samples_per_second": 99.042,
234
+ "eval_steps_per_second": 12.381,
235
+ "step": 14361
236
+ },
237
+ {
238
+ "epoch": 3.0290369751410067,
239
+ "grad_norm": 1.793472170829773,
240
+ "learning_rate": 5.681281568486804e-07,
241
+ "loss": 0.0598,
242
+ "step": 14500
243
+ },
244
+ {
245
+ "epoch": 3.133486526007938,
246
+ "grad_norm": 7.33253812789917,
247
+ "learning_rate": 5.531844944559012e-07,
248
+ "loss": 0.0477,
249
+ "step": 15000
250
+ },
251
+ {
252
+ "epoch": 3.2379360768748695,
253
+ "grad_norm": 0.11843205243349075,
254
+ "learning_rate": 5.382408320631219e-07,
255
+ "loss": 0.0527,
256
+ "step": 15500
257
+ },
258
+ {
259
+ "epoch": 3.342385627741801,
260
+ "grad_norm": 1.8657333850860596,
261
+ "learning_rate": 5.232971696703428e-07,
262
+ "loss": 0.054,
263
+ "step": 16000
264
+ },
265
+ {
266
+ "epoch": 3.446835178608732,
267
+ "grad_norm": 4.304357528686523,
268
+ "learning_rate": 5.083535072775636e-07,
269
+ "loss": 0.0521,
270
+ "step": 16500
271
+ },
272
+ {
273
+ "epoch": 3.5512847294756633,
274
+ "grad_norm": 0.2691272795200348,
275
+ "learning_rate": 4.934098448847843e-07,
276
+ "loss": 0.0516,
277
+ "step": 17000
278
+ },
279
+ {
280
+ "epoch": 3.6557342803425943,
281
+ "grad_norm": 6.170676231384277,
282
+ "learning_rate": 4.784661824920051e-07,
283
+ "loss": 0.0463,
284
+ "step": 17500
285
+ },
286
+ {
287
+ "epoch": 3.7601838312095257,
288
+ "grad_norm": 3.405898094177246,
289
+ "learning_rate": 4.635225200992259e-07,
290
+ "loss": 0.0519,
291
+ "step": 18000
292
+ },
293
+ {
294
+ "epoch": 3.864633382076457,
295
+ "grad_norm": 2.52126145362854,
296
+ "learning_rate": 4.4857885770644665e-07,
297
+ "loss": 0.0494,
298
+ "step": 18500
299
+ },
300
+ {
301
+ "epoch": 3.9690829329433885,
302
+ "grad_norm": 11.166853904724121,
303
+ "learning_rate": 4.336351953136674e-07,
304
+ "loss": 0.0457,
305
+ "step": 19000
306
+ },
307
+ {
308
+ "epoch": 4.0,
309
+ "eval_accuracy": 0.9867220242058674,
310
+ "eval_loss": 0.043168142437934875,
311
+ "eval_model_preparation_time": 0.0053,
312
+ "eval_runtime": 1034.639,
313
+ "eval_samples_per_second": 98.705,
314
+ "eval_steps_per_second": 12.339,
315
+ "step": 19148
316
+ },
317
+ {
318
+ "epoch": 4.07353248381032,
319
+ "grad_norm": 0.24023988842964172,
320
+ "learning_rate": 4.1869153292088824e-07,
321
+ "loss": 0.0495,
322
+ "step": 19500
323
+ },
324
+ {
325
+ "epoch": 4.1779820346772505,
326
+ "grad_norm": 7.678467273712158,
327
+ "learning_rate": 4.03747870528109e-07,
328
+ "loss": 0.0525,
329
+ "step": 20000
330
+ },
331
+ {
332
+ "epoch": 4.282431585544182,
333
+ "grad_norm": 0.244981586933136,
334
+ "learning_rate": 3.8880420813532977e-07,
335
+ "loss": 0.0507,
336
+ "step": 20500
337
+ },
338
+ {
339
+ "epoch": 4.386881136411113,
340
+ "grad_norm": 0.22300060093402863,
341
+ "learning_rate": 3.7386054574255054e-07,
342
+ "loss": 0.0509,
343
+ "step": 21000
344
+ },
345
+ {
346
+ "epoch": 4.491330687278045,
347
+ "grad_norm": 5.288808345794678,
348
+ "learning_rate": 3.589168833497713e-07,
349
+ "loss": 0.0474,
350
+ "step": 21500
351
+ },
352
+ {
353
+ "epoch": 4.595780238144976,
354
+ "grad_norm": 1.9191228151321411,
355
+ "learning_rate": 3.4397322095699213e-07,
356
+ "loss": 0.0488,
357
+ "step": 22000
358
+ },
359
+ {
360
+ "epoch": 4.700229789011908,
361
+ "grad_norm": 5.287907123565674,
362
+ "learning_rate": 3.290295585642129e-07,
363
+ "loss": 0.0459,
364
+ "step": 22500
365
+ },
366
+ {
367
+ "epoch": 4.804679339878838,
368
+ "grad_norm": 0.1149277314543724,
369
+ "learning_rate": 3.1408589617143366e-07,
370
+ "loss": 0.0447,
371
+ "step": 23000
372
+ },
373
+ {
374
+ "epoch": 4.9091288907457695,
375
+ "grad_norm": 3.53507137298584,
376
+ "learning_rate": 2.9914223377865443e-07,
377
+ "loss": 0.0523,
378
+ "step": 23500
379
+ },
380
+ {
381
+ "epoch": 5.0,
382
+ "eval_accuracy": 0.9870353687673808,
383
+ "eval_loss": 0.0422465056180954,
384
+ "eval_model_preparation_time": 0.0053,
385
+ "eval_runtime": 991.7535,
386
+ "eval_samples_per_second": 102.973,
387
+ "eval_steps_per_second": 12.872,
388
+ "step": 23935
389
+ },
390
+ {
391
+ "epoch": 5.013578441612701,
392
+ "grad_norm": 0.036819346249103546,
393
+ "learning_rate": 2.841985713858752e-07,
394
+ "loss": 0.0495,
395
+ "step": 24000
396
+ },
397
+ {
398
+ "epoch": 5.118027992479632,
399
+ "grad_norm": 3.238111972808838,
400
+ "learning_rate": 2.69254908993096e-07,
401
+ "loss": 0.0507,
402
+ "step": 24500
403
+ },
404
+ {
405
+ "epoch": 5.222477543346564,
406
+ "grad_norm": 0.4373936951160431,
407
+ "learning_rate": 2.543112466003168e-07,
408
+ "loss": 0.0499,
409
+ "step": 25000
410
+ },
411
+ {
412
+ "epoch": 5.326927094213495,
413
+ "grad_norm": 0.06157074496150017,
414
+ "learning_rate": 2.393675842075376e-07,
415
+ "loss": 0.0481,
416
+ "step": 25500
417
+ },
418
+ {
419
+ "epoch": 5.431376645080427,
420
+ "grad_norm": 0.5681213140487671,
421
+ "learning_rate": 2.2442392181475835e-07,
422
+ "loss": 0.048,
423
+ "step": 26000
424
+ },
425
+ {
426
+ "epoch": 5.535826195947357,
427
+ "grad_norm": 0.2663944661617279,
428
+ "learning_rate": 2.0948025942197914e-07,
429
+ "loss": 0.0498,
430
+ "step": 26500
431
+ },
432
+ {
433
+ "epoch": 5.640275746814289,
434
+ "grad_norm": 0.38863956928253174,
435
+ "learning_rate": 1.945365970291999e-07,
436
+ "loss": 0.0455,
437
+ "step": 27000
438
+ },
439
+ {
440
+ "epoch": 5.74472529768122,
441
+ "grad_norm": 1.1037393808364868,
442
+ "learning_rate": 1.795929346364207e-07,
443
+ "loss": 0.0465,
444
+ "step": 27500
445
+ },
446
+ {
447
+ "epoch": 5.849174848548151,
448
+ "grad_norm": 0.06277265399694443,
449
+ "learning_rate": 1.6464927224364147e-07,
450
+ "loss": 0.0443,
451
+ "step": 28000
452
+ },
453
+ {
454
+ "epoch": 5.953624399415083,
455
+ "grad_norm": 0.3379976153373718,
456
+ "learning_rate": 1.4970560985086224e-07,
457
+ "loss": 0.0457,
458
+ "step": 28500
459
+ },
460
+ {
461
+ "epoch": 6.0,
462
+ "eval_accuracy": 0.9869570326270025,
463
+ "eval_loss": 0.04241248220205307,
464
+ "eval_model_preparation_time": 0.0053,
465
+ "eval_runtime": 1029.9456,
466
+ "eval_samples_per_second": 99.155,
467
+ "eval_steps_per_second": 12.395,
468
+ "step": 28722
469
+ },
470
+ {
471
+ "epoch": 6.058073950282013,
472
+ "grad_norm": 4.613526821136475,
473
+ "learning_rate": 1.3476194745808303e-07,
474
+ "loss": 0.0428,
475
+ "step": 29000
476
+ },
477
+ {
478
+ "epoch": 6.162523501148945,
479
+ "grad_norm": 0.2739298641681671,
480
+ "learning_rate": 1.198182850653038e-07,
481
+ "loss": 0.0474,
482
+ "step": 29500
483
+ },
484
+ {
485
+ "epoch": 6.266973052015876,
486
+ "grad_norm": 0.4624876379966736,
487
+ "learning_rate": 1.0487462267252457e-07,
488
+ "loss": 0.0526,
489
+ "step": 30000
490
+ },
491
+ {
492
+ "epoch": 6.371422602882808,
493
+ "grad_norm": 1.4040108919143677,
494
+ "learning_rate": 8.993096027974535e-08,
495
+ "loss": 0.0449,
496
+ "step": 30500
497
+ },
498
+ {
499
+ "epoch": 6.475872153749739,
500
+ "grad_norm": 0.09033941477537155,
501
+ "learning_rate": 7.498729788696613e-08,
502
+ "loss": 0.0515,
503
+ "step": 31000
504
+ },
505
+ {
506
+ "epoch": 6.58032170461667,
507
+ "grad_norm": 0.250262588262558,
508
+ "learning_rate": 6.004363549418691e-08,
509
+ "loss": 0.0467,
510
+ "step": 31500
511
+ },
512
+ {
513
+ "epoch": 6.684771255483602,
514
+ "grad_norm": 1.7938841581344604,
515
+ "learning_rate": 4.509997310140769e-08,
516
+ "loss": 0.0545,
517
+ "step": 32000
518
+ },
519
+ {
520
+ "epoch": 6.789220806350532,
521
+ "grad_norm": 0.6938722729682922,
522
+ "learning_rate": 3.015631070862847e-08,
523
+ "loss": 0.0485,
524
+ "step": 32500
525
+ },
526
+ {
527
+ "epoch": 6.893670357217464,
528
+ "grad_norm": 0.07981687039136887,
529
+ "learning_rate": 1.5212648315849248e-08,
530
+ "loss": 0.0471,
531
+ "step": 33000
532
+ },
533
+ {
534
+ "epoch": 6.998119908084395,
535
+ "grad_norm": 1.794087290763855,
536
+ "learning_rate": 2.6898592307002603e-10,
537
+ "loss": 0.0441,
538
+ "step": 33500
539
+ },
540
+ {
541
+ "epoch": 7.0,
542
+ "eval_accuracy": 0.9871137049077592,
543
+ "eval_loss": 0.042002853006124496,
544
+ "eval_model_preparation_time": 0.0053,
545
+ "eval_runtime": 1039.9182,
546
+ "eval_samples_per_second": 98.204,
547
+ "eval_steps_per_second": 12.276,
548
+ "step": 33509
549
+ }
550
+ ],
551
+ "logging_steps": 500,
552
+ "max_steps": 33509,
553
+ "num_input_tokens_seen": 0,
554
+ "num_train_epochs": 7,
555
+ "save_steps": 500,
556
+ "stateful_callbacks": {
557
+ "TrainerControl": {
558
+ "args": {
559
+ "should_epoch_stop": false,
560
+ "should_evaluate": false,
561
+ "should_log": false,
562
+ "should_save": true,
563
+ "should_training_stop": true
564
+ },
565
+ "attributes": {}
566
+ }
567
+ },
568
+ "total_flos": 8.309373055985163e+19,
569
+ "train_batch_size": 32,
570
+ "trial_name": null,
571
+ "trial_params": null
572
+ }
checkpoint-33509/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f14dd56d897ca4c82817bd8ffc3fef58a0a0fcd162158e68e2dcc22f1ced3ee
3
+ size 4731
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76a0e858198cf9bc5c049b946990a31fbcfae443b96421d92b8311418245180c
3
  size 343223968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90a5f09dfa56d4b30ecd35e061c7dea80d5ac10f60fd639de2654825a5b9aee8
3
  size 343223968
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c28c1f257c537cb703935fd86e87d6b3ad069482a37aed881f667e21331c3d9d
3
  size 4731
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f14dd56d897ca4c82817bd8ffc3fef58a0a0fcd162158e68e2dcc22f1ced3ee
3
  size 4731