nc7777 commited on
Commit
9844834
1 Parent(s): d3122a4

Upload 8 files

Browse files
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224",
3
+ "architectures": [
4
+ "CustomViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.2,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.2,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "benigno",
13
+ "1": "maligno",
14
+ "2": "sospechoso"
15
+ },
16
+ "image_size": 224,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "benigno": 0,
21
+ "maligno": 1,
22
+ "sospechoso": 2
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "model_type": "vit",
26
+ "num_attention_heads": 12,
27
+ "num_channels": 3,
28
+ "num_hidden_layers": 12,
29
+ "patch_size": 16,
30
+ "qkv_bias": true,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.45.2"
33
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a20104ebd7177ed7afca77493358e896bc3f93a3a7dd76098997f95c195d626c
3
+ size 343227052
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86d38147a77d93ba65d84613f146533d5ad8ed94253d1aa1c91c262dd04e3d94
3
+ size 397031674
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTImageProcessorFast",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3abd336be5aa19dd934711acf6c792e4d4905d664d9c20eb143ff4803f81029d
3
+ size 13990
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35c8ac80e2e3004740b7e835c921ab149674c56cfcd7aa1d0520db45bce1df81
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,885 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.728,
3
+ "best_model_checkpoint": "VT_15/checkpoint-7279",
4
+ "epoch": 29.0,
5
+ "eval_steps": 500,
6
+ "global_step": 7279,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.398406374501992,
13
+ "grad_norm": 11.727704048156738,
14
+ "learning_rate": 9.867197875166003e-05,
15
+ "loss": 1.0157,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.796812749003984,
20
+ "grad_norm": 11.50269603729248,
21
+ "learning_rate": 9.734395750332006e-05,
22
+ "loss": 0.8938,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 1.0,
27
+ "eval_accuracy": 0.548,
28
+ "eval_f1": 0.4882155949880584,
29
+ "eval_loss": 0.9423586130142212,
30
+ "eval_precision": 0.5733499456903712,
31
+ "eval_recall": 0.5448468191272983,
32
+ "eval_runtime": 16.5911,
33
+ "eval_samples_per_second": 30.137,
34
+ "eval_steps_per_second": 3.797,
35
+ "step": 251
36
+ },
37
+ {
38
+ "epoch": 1.1952191235059761,
39
+ "grad_norm": 10.47740650177002,
40
+ "learning_rate": 9.601593625498009e-05,
41
+ "loss": 0.8485,
42
+ "step": 300
43
+ },
44
+ {
45
+ "epoch": 1.593625498007968,
46
+ "grad_norm": 12.222431182861328,
47
+ "learning_rate": 9.468791500664011e-05,
48
+ "loss": 0.7978,
49
+ "step": 400
50
+ },
51
+ {
52
+ "epoch": 1.9920318725099602,
53
+ "grad_norm": 13.02956771850586,
54
+ "learning_rate": 9.335989375830013e-05,
55
+ "loss": 0.8093,
56
+ "step": 500
57
+ },
58
+ {
59
+ "epoch": 2.0,
60
+ "eval_accuracy": 0.61,
61
+ "eval_f1": 0.584707628587346,
62
+ "eval_loss": 0.8303987979888916,
63
+ "eval_precision": 0.6022599150943656,
64
+ "eval_recall": 0.6097290164964017,
65
+ "eval_runtime": 15.8756,
66
+ "eval_samples_per_second": 31.495,
67
+ "eval_steps_per_second": 3.968,
68
+ "step": 502
69
+ },
70
+ {
71
+ "epoch": 2.3904382470119523,
72
+ "grad_norm": 11.920299530029297,
73
+ "learning_rate": 9.203187250996016e-05,
74
+ "loss": 0.7275,
75
+ "step": 600
76
+ },
77
+ {
78
+ "epoch": 2.7888446215139444,
79
+ "grad_norm": 12.057920455932617,
80
+ "learning_rate": 9.070385126162018e-05,
81
+ "loss": 0.7444,
82
+ "step": 700
83
+ },
84
+ {
85
+ "epoch": 3.0,
86
+ "eval_accuracy": 0.578,
87
+ "eval_f1": 0.5180026990553307,
88
+ "eval_loss": 0.8966869115829468,
89
+ "eval_precision": 0.6246163183733936,
90
+ "eval_recall": 0.5751176873923239,
91
+ "eval_runtime": 16.5617,
92
+ "eval_samples_per_second": 30.19,
93
+ "eval_steps_per_second": 3.804,
94
+ "step": 753
95
+ },
96
+ {
97
+ "epoch": 3.187250996015936,
98
+ "grad_norm": 12.730193138122559,
99
+ "learning_rate": 8.937583001328021e-05,
100
+ "loss": 0.7091,
101
+ "step": 800
102
+ },
103
+ {
104
+ "epoch": 3.585657370517928,
105
+ "grad_norm": 11.721458435058594,
106
+ "learning_rate": 8.804780876494024e-05,
107
+ "loss": 0.6502,
108
+ "step": 900
109
+ },
110
+ {
111
+ "epoch": 3.9840637450199203,
112
+ "grad_norm": 11.902241706848145,
113
+ "learning_rate": 8.671978751660027e-05,
114
+ "loss": 0.6391,
115
+ "step": 1000
116
+ },
117
+ {
118
+ "epoch": 4.0,
119
+ "eval_accuracy": 0.624,
120
+ "eval_f1": 0.6176260916415671,
121
+ "eval_loss": 0.8131240010261536,
122
+ "eval_precision": 0.6213088498802785,
123
+ "eval_recall": 0.6225291409540601,
124
+ "eval_runtime": 16.9186,
125
+ "eval_samples_per_second": 29.553,
126
+ "eval_steps_per_second": 3.724,
127
+ "step": 1004
128
+ },
129
+ {
130
+ "epoch": 4.382470119521912,
131
+ "grad_norm": 11.794739723205566,
132
+ "learning_rate": 8.539176626826029e-05,
133
+ "loss": 0.5683,
134
+ "step": 1100
135
+ },
136
+ {
137
+ "epoch": 4.780876494023905,
138
+ "grad_norm": 11.683808326721191,
139
+ "learning_rate": 8.406374501992032e-05,
140
+ "loss": 0.5691,
141
+ "step": 1200
142
+ },
143
+ {
144
+ "epoch": 5.0,
145
+ "eval_accuracy": 0.622,
146
+ "eval_f1": 0.5716700610476999,
147
+ "eval_loss": 0.8928351998329163,
148
+ "eval_precision": 0.6410496659546192,
149
+ "eval_recall": 0.6199959090306707,
150
+ "eval_runtime": 16.6354,
151
+ "eval_samples_per_second": 30.056,
152
+ "eval_steps_per_second": 3.787,
153
+ "step": 1255
154
+ },
155
+ {
156
+ "epoch": 5.179282868525896,
157
+ "grad_norm": 13.705164909362793,
158
+ "learning_rate": 8.273572377158035e-05,
159
+ "loss": 0.5855,
160
+ "step": 1300
161
+ },
162
+ {
163
+ "epoch": 5.577689243027889,
164
+ "grad_norm": 11.817400932312012,
165
+ "learning_rate": 8.140770252324038e-05,
166
+ "loss": 0.513,
167
+ "step": 1400
168
+ },
169
+ {
170
+ "epoch": 5.9760956175298805,
171
+ "grad_norm": 11.755231857299805,
172
+ "learning_rate": 8.00796812749004e-05,
173
+ "loss": 0.5009,
174
+ "step": 1500
175
+ },
176
+ {
177
+ "epoch": 6.0,
178
+ "eval_accuracy": 0.656,
179
+ "eval_f1": 0.6379970132206839,
180
+ "eval_loss": 0.8215978741645813,
181
+ "eval_precision": 0.6485419139105967,
182
+ "eval_recall": 0.6545787165880164,
183
+ "eval_runtime": 17.7464,
184
+ "eval_samples_per_second": 28.175,
185
+ "eval_steps_per_second": 3.55,
186
+ "step": 1506
187
+ },
188
+ {
189
+ "epoch": 6.374501992031872,
190
+ "grad_norm": 10.913691520690918,
191
+ "learning_rate": 7.875166002656043e-05,
192
+ "loss": 0.4488,
193
+ "step": 1600
194
+ },
195
+ {
196
+ "epoch": 6.772908366533865,
197
+ "grad_norm": 10.615025520324707,
198
+ "learning_rate": 7.742363877822046e-05,
199
+ "loss": 0.4855,
200
+ "step": 1700
201
+ },
202
+ {
203
+ "epoch": 7.0,
204
+ "eval_accuracy": 0.654,
205
+ "eval_f1": 0.6439337408377656,
206
+ "eval_loss": 0.8311923742294312,
207
+ "eval_precision": 0.6495670995670996,
208
+ "eval_recall": 0.6520354012895196,
209
+ "eval_runtime": 17.519,
210
+ "eval_samples_per_second": 28.54,
211
+ "eval_steps_per_second": 3.596,
212
+ "step": 1757
213
+ },
214
+ {
215
+ "epoch": 7.171314741035856,
216
+ "grad_norm": 12.026023864746094,
217
+ "learning_rate": 7.609561752988048e-05,
218
+ "loss": 0.4177,
219
+ "step": 1800
220
+ },
221
+ {
222
+ "epoch": 7.569721115537849,
223
+ "grad_norm": 10.010376930236816,
224
+ "learning_rate": 7.476759628154051e-05,
225
+ "loss": 0.409,
226
+ "step": 1900
227
+ },
228
+ {
229
+ "epoch": 7.968127490039841,
230
+ "grad_norm": 12.596341133117676,
231
+ "learning_rate": 7.343957503320054e-05,
232
+ "loss": 0.39,
233
+ "step": 2000
234
+ },
235
+ {
236
+ "epoch": 8.0,
237
+ "eval_accuracy": 0.682,
238
+ "eval_f1": 0.6538606492353024,
239
+ "eval_loss": 0.9214051365852356,
240
+ "eval_precision": 0.7101278814728985,
241
+ "eval_recall": 0.6804800262743945,
242
+ "eval_runtime": 17.2651,
243
+ "eval_samples_per_second": 28.96,
244
+ "eval_steps_per_second": 3.649,
245
+ "step": 2008
246
+ },
247
+ {
248
+ "epoch": 8.366533864541832,
249
+ "grad_norm": 14.169309616088867,
250
+ "learning_rate": 7.211155378486057e-05,
251
+ "loss": 0.3646,
252
+ "step": 2100
253
+ },
254
+ {
255
+ "epoch": 8.764940239043824,
256
+ "grad_norm": 12.020890235900879,
257
+ "learning_rate": 7.07835325365206e-05,
258
+ "loss": 0.3708,
259
+ "step": 2200
260
+ },
261
+ {
262
+ "epoch": 9.0,
263
+ "eval_accuracy": 0.598,
264
+ "eval_f1": 0.5725541685903895,
265
+ "eval_loss": 1.0422428846359253,
266
+ "eval_precision": 0.6090800979488745,
267
+ "eval_recall": 0.5948744763847355,
268
+ "eval_runtime": 17.2984,
269
+ "eval_samples_per_second": 28.904,
270
+ "eval_steps_per_second": 3.642,
271
+ "step": 2259
272
+ },
273
+ {
274
+ "epoch": 9.163346613545817,
275
+ "grad_norm": 10.583425521850586,
276
+ "learning_rate": 6.945551128818062e-05,
277
+ "loss": 0.3533,
278
+ "step": 2300
279
+ },
280
+ {
281
+ "epoch": 9.56175298804781,
282
+ "grad_norm": 13.7178316116333,
283
+ "learning_rate": 6.812749003984064e-05,
284
+ "loss": 0.2984,
285
+ "step": 2400
286
+ },
287
+ {
288
+ "epoch": 9.9601593625498,
289
+ "grad_norm": 10.42063045501709,
290
+ "learning_rate": 6.679946879150066e-05,
291
+ "loss": 0.3328,
292
+ "step": 2500
293
+ },
294
+ {
295
+ "epoch": 10.0,
296
+ "eval_accuracy": 0.718,
297
+ "eval_f1": 0.715429962270956,
298
+ "eval_loss": 0.7483692765235901,
299
+ "eval_precision": 0.7195966559320596,
300
+ "eval_recall": 0.7178075285359515,
301
+ "eval_runtime": 17.492,
302
+ "eval_samples_per_second": 28.584,
303
+ "eval_steps_per_second": 3.602,
304
+ "step": 2510
305
+ },
306
+ {
307
+ "epoch": 10.358565737051793,
308
+ "grad_norm": 10.752534866333008,
309
+ "learning_rate": 6.547144754316069e-05,
310
+ "loss": 0.2561,
311
+ "step": 2600
312
+ },
313
+ {
314
+ "epoch": 10.756972111553784,
315
+ "grad_norm": 10.988365173339844,
316
+ "learning_rate": 6.414342629482072e-05,
317
+ "loss": 0.3092,
318
+ "step": 2700
319
+ },
320
+ {
321
+ "epoch": 11.0,
322
+ "eval_accuracy": 0.72,
323
+ "eval_f1": 0.7160176967190494,
324
+ "eval_loss": 0.8250208497047424,
325
+ "eval_precision": 0.7184132303947973,
326
+ "eval_recall": 0.7186398391269527,
327
+ "eval_runtime": 17.2463,
328
+ "eval_samples_per_second": 28.992,
329
+ "eval_steps_per_second": 3.653,
330
+ "step": 2761
331
+ },
332
+ {
333
+ "epoch": 11.155378486055778,
334
+ "grad_norm": 10.967025756835938,
335
+ "learning_rate": 6.281540504648075e-05,
336
+ "loss": 0.28,
337
+ "step": 2800
338
+ },
339
+ {
340
+ "epoch": 11.55378486055777,
341
+ "grad_norm": 11.933313369750977,
342
+ "learning_rate": 6.148738379814077e-05,
343
+ "loss": 0.2747,
344
+ "step": 2900
345
+ },
346
+ {
347
+ "epoch": 11.952191235059761,
348
+ "grad_norm": 12.90857219696045,
349
+ "learning_rate": 6.01593625498008e-05,
350
+ "loss": 0.281,
351
+ "step": 3000
352
+ },
353
+ {
354
+ "epoch": 12.0,
355
+ "eval_accuracy": 0.694,
356
+ "eval_f1": 0.6781251589992235,
357
+ "eval_loss": 0.9806899428367615,
358
+ "eval_precision": 0.6976797604396068,
359
+ "eval_recall": 0.6921398650556313,
360
+ "eval_runtime": 17.2981,
361
+ "eval_samples_per_second": 28.905,
362
+ "eval_steps_per_second": 3.642,
363
+ "step": 3012
364
+ },
365
+ {
366
+ "epoch": 12.350597609561753,
367
+ "grad_norm": 11.363082885742188,
368
+ "learning_rate": 5.883134130146083e-05,
369
+ "loss": 0.2596,
370
+ "step": 3100
371
+ },
372
+ {
373
+ "epoch": 12.749003984063744,
374
+ "grad_norm": 12.736093521118164,
375
+ "learning_rate": 5.7503320053120855e-05,
376
+ "loss": 0.2162,
377
+ "step": 3200
378
+ },
379
+ {
380
+ "epoch": 13.0,
381
+ "eval_accuracy": 0.708,
382
+ "eval_f1": 0.7021505447248022,
383
+ "eval_loss": 0.9850034117698669,
384
+ "eval_precision": 0.7101112865680695,
385
+ "eval_recall": 0.7063781669000248,
386
+ "eval_runtime": 17.6848,
387
+ "eval_samples_per_second": 28.273,
388
+ "eval_steps_per_second": 3.562,
389
+ "step": 3263
390
+ },
391
+ {
392
+ "epoch": 13.147410358565738,
393
+ "grad_norm": 11.957535743713379,
394
+ "learning_rate": 5.6175298804780876e-05,
395
+ "loss": 0.2271,
396
+ "step": 3300
397
+ },
398
+ {
399
+ "epoch": 13.54581673306773,
400
+ "grad_norm": 13.91019058227539,
401
+ "learning_rate": 5.48472775564409e-05,
402
+ "loss": 0.2284,
403
+ "step": 3400
404
+ },
405
+ {
406
+ "epoch": 13.944223107569721,
407
+ "grad_norm": 10.606439590454102,
408
+ "learning_rate": 5.351925630810093e-05,
409
+ "loss": 0.2352,
410
+ "step": 3500
411
+ },
412
+ {
413
+ "epoch": 14.0,
414
+ "eval_accuracy": 0.698,
415
+ "eval_f1": 0.6875106838790609,
416
+ "eval_loss": 0.9122900366783142,
417
+ "eval_precision": 0.6937544840437923,
418
+ "eval_recall": 0.6964621758194903,
419
+ "eval_runtime": 17.3918,
420
+ "eval_samples_per_second": 28.749,
421
+ "eval_steps_per_second": 3.622,
422
+ "step": 3514
423
+ },
424
+ {
425
+ "epoch": 14.342629482071713,
426
+ "grad_norm": 10.729408264160156,
427
+ "learning_rate": 5.219123505976096e-05,
428
+ "loss": 0.2339,
429
+ "step": 3600
430
+ },
431
+ {
432
+ "epoch": 14.741035856573705,
433
+ "grad_norm": 10.717667579650879,
434
+ "learning_rate": 5.0863213811420985e-05,
435
+ "loss": 0.1947,
436
+ "step": 3700
437
+ },
438
+ {
439
+ "epoch": 15.0,
440
+ "eval_accuracy": 0.694,
441
+ "eval_f1": 0.6847697638967624,
442
+ "eval_loss": 1.0269464254379272,
443
+ "eval_precision": 0.6984319398216817,
444
+ "eval_recall": 0.6918425495381815,
445
+ "eval_runtime": 17.3612,
446
+ "eval_samples_per_second": 28.8,
447
+ "eval_steps_per_second": 3.629,
448
+ "step": 3765
449
+ },
450
+ {
451
+ "epoch": 15.139442231075698,
452
+ "grad_norm": 15.637863159179688,
453
+ "learning_rate": 4.953519256308101e-05,
454
+ "loss": 0.2033,
455
+ "step": 3800
456
+ },
457
+ {
458
+ "epoch": 15.53784860557769,
459
+ "grad_norm": 11.52315902709961,
460
+ "learning_rate": 4.820717131474104e-05,
461
+ "loss": 0.2073,
462
+ "step": 3900
463
+ },
464
+ {
465
+ "epoch": 15.936254980079681,
466
+ "grad_norm": 10.585031509399414,
467
+ "learning_rate": 4.687915006640107e-05,
468
+ "loss": 0.1902,
469
+ "step": 4000
470
+ },
471
+ {
472
+ "epoch": 16.0,
473
+ "eval_accuracy": 0.702,
474
+ "eval_f1": 0.6936988146223305,
475
+ "eval_loss": 1.0092582702636719,
476
+ "eval_precision": 0.704450992084371,
477
+ "eval_recall": 0.6998746780522377,
478
+ "eval_runtime": 17.2437,
479
+ "eval_samples_per_second": 28.996,
480
+ "eval_steps_per_second": 3.654,
481
+ "step": 4016
482
+ },
483
+ {
484
+ "epoch": 16.334661354581673,
485
+ "grad_norm": 13.553791046142578,
486
+ "learning_rate": 4.555112881806109e-05,
487
+ "loss": 0.1961,
488
+ "step": 4100
489
+ },
490
+ {
491
+ "epoch": 16.733067729083665,
492
+ "grad_norm": 10.33850383758545,
493
+ "learning_rate": 4.4223107569721116e-05,
494
+ "loss": 0.1912,
495
+ "step": 4200
496
+ },
497
+ {
498
+ "epoch": 17.0,
499
+ "eval_accuracy": 0.712,
500
+ "eval_f1": 0.7037256290589013,
501
+ "eval_loss": 0.9451501369476318,
502
+ "eval_precision": 0.7113280708081392,
503
+ "eval_recall": 0.710350440499444,
504
+ "eval_runtime": 17.5777,
505
+ "eval_samples_per_second": 28.445,
506
+ "eval_steps_per_second": 3.584,
507
+ "step": 4267
508
+ },
509
+ {
510
+ "epoch": 17.131474103585656,
511
+ "grad_norm": 10.550813674926758,
512
+ "learning_rate": 4.289508632138114e-05,
513
+ "loss": 0.1724,
514
+ "step": 4300
515
+ },
516
+ {
517
+ "epoch": 17.529880478087648,
518
+ "grad_norm": 11.269770622253418,
519
+ "learning_rate": 4.156706507304117e-05,
520
+ "loss": 0.166,
521
+ "step": 4400
522
+ },
523
+ {
524
+ "epoch": 17.92828685258964,
525
+ "grad_norm": 11.22702693939209,
526
+ "learning_rate": 4.02390438247012e-05,
527
+ "loss": 0.1626,
528
+ "step": 4500
529
+ },
530
+ {
531
+ "epoch": 18.0,
532
+ "eval_accuracy": 0.71,
533
+ "eval_f1": 0.6965991557822268,
534
+ "eval_loss": 1.0229520797729492,
535
+ "eval_precision": 0.7119117791531585,
536
+ "eval_recall": 0.7080606500607883,
537
+ "eval_runtime": 17.3563,
538
+ "eval_samples_per_second": 28.808,
539
+ "eval_steps_per_second": 3.63,
540
+ "step": 4518
541
+ },
542
+ {
543
+ "epoch": 18.326693227091635,
544
+ "grad_norm": 11.750710487365723,
545
+ "learning_rate": 3.8911022576361225e-05,
546
+ "loss": 0.146,
547
+ "step": 4600
548
+ },
549
+ {
550
+ "epoch": 18.725099601593627,
551
+ "grad_norm": 10.37628173828125,
552
+ "learning_rate": 3.758300132802125e-05,
553
+ "loss": 0.1524,
554
+ "step": 4700
555
+ },
556
+ {
557
+ "epoch": 19.0,
558
+ "eval_accuracy": 0.716,
559
+ "eval_f1": 0.7120852228214192,
560
+ "eval_loss": 0.9977978467941284,
561
+ "eval_precision": 0.7210033022533023,
562
+ "eval_recall": 0.7144085668354911,
563
+ "eval_runtime": 17.5249,
564
+ "eval_samples_per_second": 28.531,
565
+ "eval_steps_per_second": 3.595,
566
+ "step": 4769
567
+ },
568
+ {
569
+ "epoch": 19.12350597609562,
570
+ "grad_norm": 12.99516487121582,
571
+ "learning_rate": 3.625498007968128e-05,
572
+ "loss": 0.1321,
573
+ "step": 4800
574
+ },
575
+ {
576
+ "epoch": 19.52191235059761,
577
+ "grad_norm": 11.697456359863281,
578
+ "learning_rate": 3.492695883134131e-05,
579
+ "loss": 0.1508,
580
+ "step": 4900
581
+ },
582
+ {
583
+ "epoch": 19.9203187250996,
584
+ "grad_norm": 11.452008247375488,
585
+ "learning_rate": 3.359893758300133e-05,
586
+ "loss": 0.1258,
587
+ "step": 5000
588
+ },
589
+ {
590
+ "epoch": 20.0,
591
+ "eval_accuracy": 0.71,
592
+ "eval_f1": 0.7074786456937486,
593
+ "eval_loss": 1.050653338432312,
594
+ "eval_precision": 0.7215994120996662,
595
+ "eval_recall": 0.7083547965174904,
596
+ "eval_runtime": 29.4041,
597
+ "eval_samples_per_second": 17.004,
598
+ "eval_steps_per_second": 2.143,
599
+ "step": 5020
600
+ },
601
+ {
602
+ "epoch": 20.318725099601593,
603
+ "grad_norm": 12.027978897094727,
604
+ "learning_rate": 3.2270916334661356e-05,
605
+ "loss": 0.1387,
606
+ "step": 5100
607
+ },
608
+ {
609
+ "epoch": 20.717131474103585,
610
+ "grad_norm": 13.427599906921387,
611
+ "learning_rate": 3.094289508632138e-05,
612
+ "loss": 0.1116,
613
+ "step": 5200
614
+ },
615
+ {
616
+ "epoch": 21.0,
617
+ "eval_accuracy": 0.724,
618
+ "eval_f1": 0.7153409174915838,
619
+ "eval_loss": 1.0689764022827148,
620
+ "eval_precision": 0.7231692880094706,
621
+ "eval_recall": 0.722209642011374,
622
+ "eval_runtime": 17.4789,
623
+ "eval_samples_per_second": 28.606,
624
+ "eval_steps_per_second": 3.604,
625
+ "step": 5271
626
+ },
627
+ {
628
+ "epoch": 21.115537848605577,
629
+ "grad_norm": 12.410263061523438,
630
+ "learning_rate": 2.961487383798141e-05,
631
+ "loss": 0.1378,
632
+ "step": 5300
633
+ },
634
+ {
635
+ "epoch": 21.51394422310757,
636
+ "grad_norm": 12.236252784729004,
637
+ "learning_rate": 2.8286852589641438e-05,
638
+ "loss": 0.1181,
639
+ "step": 5400
640
+ },
641
+ {
642
+ "epoch": 21.91235059760956,
643
+ "grad_norm": 11.362260818481445,
644
+ "learning_rate": 2.6958831341301462e-05,
645
+ "loss": 0.1158,
646
+ "step": 5500
647
+ },
648
+ {
649
+ "epoch": 22.0,
650
+ "eval_accuracy": 0.702,
651
+ "eval_f1": 0.6967439243675191,
652
+ "eval_loss": 1.1378962993621826,
653
+ "eval_precision": 0.7034445997704206,
654
+ "eval_recall": 0.7004485661440597,
655
+ "eval_runtime": 17.752,
656
+ "eval_samples_per_second": 28.166,
657
+ "eval_steps_per_second": 3.549,
658
+ "step": 5522
659
+ },
660
+ {
661
+ "epoch": 22.310756972111555,
662
+ "grad_norm": 12.567873001098633,
663
+ "learning_rate": 2.563081009296149e-05,
664
+ "loss": 0.1089,
665
+ "step": 5600
666
+ },
667
+ {
668
+ "epoch": 22.709163346613547,
669
+ "grad_norm": 10.872307777404785,
670
+ "learning_rate": 2.4302788844621517e-05,
671
+ "loss": 0.1069,
672
+ "step": 5700
673
+ },
674
+ {
675
+ "epoch": 23.0,
676
+ "eval_accuracy": 0.722,
677
+ "eval_f1": 0.7172758119553166,
678
+ "eval_loss": 1.157360553741455,
679
+ "eval_precision": 0.727205590108816,
680
+ "eval_recall": 0.7205813209797584,
681
+ "eval_runtime": 17.8753,
682
+ "eval_samples_per_second": 27.972,
683
+ "eval_steps_per_second": 3.524,
684
+ "step": 5773
685
+ },
686
+ {
687
+ "epoch": 23.10756972111554,
688
+ "grad_norm": 10.918773651123047,
689
+ "learning_rate": 2.297476759628154e-05,
690
+ "loss": 0.1112,
691
+ "step": 5800
692
+ },
693
+ {
694
+ "epoch": 23.50597609561753,
695
+ "grad_norm": 11.303016662597656,
696
+ "learning_rate": 2.1646746347941568e-05,
697
+ "loss": 0.0954,
698
+ "step": 5900
699
+ },
700
+ {
701
+ "epoch": 23.904382470119522,
702
+ "grad_norm": 10.82700252532959,
703
+ "learning_rate": 2.0318725099601595e-05,
704
+ "loss": 0.1089,
705
+ "step": 6000
706
+ },
707
+ {
708
+ "epoch": 24.0,
709
+ "eval_accuracy": 0.712,
710
+ "eval_f1": 0.7075426800060708,
711
+ "eval_loss": 1.1160012483596802,
712
+ "eval_precision": 0.7194793034050283,
713
+ "eval_recall": 0.7103697429603638,
714
+ "eval_runtime": 17.7441,
715
+ "eval_samples_per_second": 28.178,
716
+ "eval_steps_per_second": 3.55,
717
+ "step": 6024
718
+ },
719
+ {
720
+ "epoch": 24.302788844621514,
721
+ "grad_norm": 10.5631742477417,
722
+ "learning_rate": 1.899070385126162e-05,
723
+ "loss": 0.0864,
724
+ "step": 6100
725
+ },
726
+ {
727
+ "epoch": 24.701195219123505,
728
+ "grad_norm": 10.918201446533203,
729
+ "learning_rate": 1.7662682602921647e-05,
730
+ "loss": 0.0999,
731
+ "step": 6200
732
+ },
733
+ {
734
+ "epoch": 25.0,
735
+ "eval_accuracy": 0.716,
736
+ "eval_f1": 0.7090935362771184,
737
+ "eval_loss": 1.0727450847625732,
738
+ "eval_precision": 0.7106284520077623,
739
+ "eval_recall": 0.7145099767794276,
740
+ "eval_runtime": 17.8845,
741
+ "eval_samples_per_second": 27.957,
742
+ "eval_steps_per_second": 3.523,
743
+ "step": 6275
744
+ },
745
+ {
746
+ "epoch": 25.099601593625497,
747
+ "grad_norm": 11.402228355407715,
748
+ "learning_rate": 1.6334661354581674e-05,
749
+ "loss": 0.1042,
750
+ "step": 6300
751
+ },
752
+ {
753
+ "epoch": 25.49800796812749,
754
+ "grad_norm": 11.740915298461914,
755
+ "learning_rate": 1.5006640106241702e-05,
756
+ "loss": 0.089,
757
+ "step": 6400
758
+ },
759
+ {
760
+ "epoch": 25.89641434262948,
761
+ "grad_norm": 11.218791961669922,
762
+ "learning_rate": 1.3678618857901726e-05,
763
+ "loss": 0.0738,
764
+ "step": 6500
765
+ },
766
+ {
767
+ "epoch": 26.0,
768
+ "eval_accuracy": 0.706,
769
+ "eval_f1": 0.699550751079995,
770
+ "eval_loss": 1.2584666013717651,
771
+ "eval_precision": 0.7133200179296525,
772
+ "eval_recall": 0.704105950343699,
773
+ "eval_runtime": 18.0765,
774
+ "eval_samples_per_second": 27.66,
775
+ "eval_steps_per_second": 3.485,
776
+ "step": 6526
777
+ },
778
+ {
779
+ "epoch": 26.294820717131476,
780
+ "grad_norm": 9.9302339553833,
781
+ "learning_rate": 1.2350597609561753e-05,
782
+ "loss": 0.0914,
783
+ "step": 6600
784
+ },
785
+ {
786
+ "epoch": 26.693227091633467,
787
+ "grad_norm": 11.051177024841309,
788
+ "learning_rate": 1.102257636122178e-05,
789
+ "loss": 0.0836,
790
+ "step": 6700
791
+ },
792
+ {
793
+ "epoch": 27.0,
794
+ "eval_accuracy": 0.718,
795
+ "eval_f1": 0.7104355302219595,
796
+ "eval_loss": 1.1709084510803223,
797
+ "eval_precision": 0.7172922964310544,
798
+ "eval_recall": 0.7163028008735083,
799
+ "eval_runtime": 17.5336,
800
+ "eval_samples_per_second": 28.517,
801
+ "eval_steps_per_second": 3.593,
802
+ "step": 6777
803
+ },
804
+ {
805
+ "epoch": 27.09163346613546,
806
+ "grad_norm": 10.727697372436523,
807
+ "learning_rate": 9.694555112881806e-06,
808
+ "loss": 0.0986,
809
+ "step": 6800
810
+ },
811
+ {
812
+ "epoch": 27.49003984063745,
813
+ "grad_norm": 11.817888259887695,
814
+ "learning_rate": 8.366533864541832e-06,
815
+ "loss": 0.07,
816
+ "step": 6900
817
+ },
818
+ {
819
+ "epoch": 27.888446215139442,
820
+ "grad_norm": 10.357769966125488,
821
+ "learning_rate": 7.03851261620186e-06,
822
+ "loss": 0.0775,
823
+ "step": 7000
824
+ },
825
+ {
826
+ "epoch": 28.0,
827
+ "eval_accuracy": 0.722,
828
+ "eval_f1": 0.7145836341124611,
829
+ "eval_loss": 1.2422434091567993,
830
+ "eval_precision": 0.7256025662918439,
831
+ "eval_recall": 0.720296105512437,
832
+ "eval_runtime": 17.5709,
833
+ "eval_samples_per_second": 28.456,
834
+ "eval_steps_per_second": 3.585,
835
+ "step": 7028
836
+ },
837
+ {
838
+ "epoch": 28.286852589641434,
839
+ "grad_norm": 10.4796142578125,
840
+ "learning_rate": 5.710491367861886e-06,
841
+ "loss": 0.0713,
842
+ "step": 7100
843
+ },
844
+ {
845
+ "epoch": 28.685258964143426,
846
+ "grad_norm": 9.803996086120605,
847
+ "learning_rate": 4.382470119521913e-06,
848
+ "loss": 0.0752,
849
+ "step": 7200
850
+ },
851
+ {
852
+ "epoch": 29.0,
853
+ "eval_accuracy": 0.728,
854
+ "eval_f1": 0.7205734767025089,
855
+ "eval_loss": 1.2145317792892456,
856
+ "eval_precision": 0.730059540405073,
857
+ "eval_recall": 0.7263936664880468,
858
+ "eval_runtime": 17.625,
859
+ "eval_samples_per_second": 28.369,
860
+ "eval_steps_per_second": 3.574,
861
+ "step": 7279
862
+ }
863
+ ],
864
+ "logging_steps": 100,
865
+ "max_steps": 7530,
866
+ "num_input_tokens_seen": 0,
867
+ "num_train_epochs": 30,
868
+ "save_steps": 500,
869
+ "stateful_callbacks": {
870
+ "TrainerControl": {
871
+ "args": {
872
+ "should_epoch_stop": false,
873
+ "should_evaluate": false,
874
+ "should_log": false,
875
+ "should_save": true,
876
+ "should_training_stop": false
877
+ },
878
+ "attributes": {}
879
+ }
880
+ },
881
+ "total_flos": 8.995893225012062e+18,
882
+ "train_batch_size": 16,
883
+ "trial_name": null,
884
+ "trial_params": null
885
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3578ac3aa4d3823d4733a342727e9029e0b82da9c9013e117e3a548971b8a425
3
+ size 5240