kanelindsay2000 commited on
Commit
68d4880
·
verified ·
1 Parent(s): 9a06153

Delete roberta-loc-classifier

Browse files
roberta-loc-classifier/config.json DELETED
@@ -1,28 +0,0 @@
1
- {
2
- "_name_or_path": "roberta-base",
3
- "architectures": [
4
- "RobertaForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 0,
8
- "classifier_dropout": null,
9
- "eos_token_id": 2,
10
- "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.1,
12
- "hidden_size": 768,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 3072,
15
- "layer_norm_eps": 1e-05,
16
- "max_position_embeddings": 514,
17
- "model_type": "roberta",
18
- "num_attention_heads": 12,
19
- "num_hidden_layers": 12,
20
- "pad_token_id": 1,
21
- "position_embedding_type": "absolute",
22
- "problem_type": "single_label_classification",
23
- "torch_dtype": "float32",
24
- "transformers_version": "4.43.1",
25
- "type_vocab_size": 1,
26
- "use_cache": true,
27
- "vocab_size": 50265
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
roberta-loc-classifier/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
roberta-loc-classifier/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c094612cea9dcaf5905c0da642fcaf038dc423743738e31f22e0c94afa2581ca
3
- size 498612824
 
 
 
 
roberta-loc-classifier/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3bac9d1e000bf018f003b16b3d68528d793d5e9edc5d44f9706910afcffc234
3
- size 997345530
 
 
 
 
roberta-loc-classifier/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:71e46d01902a6e5e86b53d78c5c1fb888a4d7a73e0132f7b3c5608010601324c
3
- size 14244
 
 
 
 
roberta-loc-classifier/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4271b529ffd68ad25597aaede6f8cfc879e800716a6412047ca47388f094caff
3
- size 1064
 
 
 
 
roberta-loc-classifier/special_tokens_map.json DELETED
@@ -1,51 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": true,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "mask_token": {
24
- "content": "<mask>",
25
- "lstrip": true,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- "pad_token": {
31
- "content": "<pad>",
32
- "lstrip": false,
33
- "normalized": true,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "sep_token": {
38
- "content": "</s>",
39
- "lstrip": false,
40
- "normalized": true,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
- "unk_token": {
45
- "content": "<unk>",
46
- "lstrip": false,
47
- "normalized": true,
48
- "rstrip": false,
49
- "single_word": false
50
- }
51
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
roberta-loc-classifier/tokenizer_config.json DELETED
@@ -1,56 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<s>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<pad>",
14
- "lstrip": false,
15
- "normalized": true,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "2": {
21
- "content": "</s>",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "3": {
29
- "content": "<unk>",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
- },
36
- "50264": {
37
- "content": "<mask>",
38
- "lstrip": true,
39
- "normalized": false,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": true
43
- }
44
- },
45
- "bos_token": "<s>",
46
- "clean_up_tokenization_spaces": true,
47
- "cls_token": "<s>",
48
- "eos_token": "</s>",
49
- "errors": "replace",
50
- "mask_token": "<mask>",
51
- "model_max_length": 512,
52
- "pad_token": "<pad>",
53
- "sep_token": "</s>",
54
- "tokenizer_class": "RobertaTokenizer",
55
- "unk_token": "<unk>"
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
roberta-loc-classifier/trainer_state.json DELETED
@@ -1,375 +0,0 @@
1
- {
2
- "best_metric": 0.05933361500501633,
3
- "best_model_checkpoint": "runs/roberta-base-500000-samples-512-max-len-64-train-batch-size-8-test-batch-size-3-epochs-1e-05-lr-0.1-warmup-ratio/checkpoint-12000",
4
- "epoch": 2.1331058020477816,
5
- "eval_steps": 1500,
6
- "global_step": 15000,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0,
13
- "eval_accuracy": 0.4971,
14
- "eval_f1": 0.6639222657346396,
15
- "eval_loss": 0.6927798390388489,
16
- "eval_precision": 0.4973866025833584,
17
- "eval_recall": 0.9981112361356695,
18
- "eval_runtime": 372.4207,
19
- "eval_samples_per_second": 134.257,
20
- "eval_steps_per_second": 16.782,
21
- "step": 0
22
- },
23
- {
24
- "epoch": 0.07110352673492605,
25
- "grad_norm": 11.443495750427246,
26
- "learning_rate": 2.369668246445498e-06,
27
- "loss": 0.4665,
28
- "step": 500
29
- },
30
- {
31
- "epoch": 0.1422070534698521,
32
- "grad_norm": 5.395324230194092,
33
- "learning_rate": 4.739336492890996e-06,
34
- "loss": 0.1226,
35
- "step": 1000
36
- },
37
- {
38
- "epoch": 0.21331058020477817,
39
- "grad_norm": 3.713897705078125,
40
- "learning_rate": 7.1090047393364935e-06,
41
- "loss": 0.1013,
42
- "step": 1500
43
- },
44
- {
45
- "epoch": 0.21331058020477817,
46
- "eval_accuracy": 0.96946,
47
- "eval_f1": 0.9694067677759302,
48
- "eval_loss": 0.08184666186571121,
49
- "eval_precision": 0.9665987454552719,
50
- "eval_recall": 0.9722311525478219,
51
- "eval_runtime": 369.1947,
52
- "eval_samples_per_second": 135.43,
53
- "eval_steps_per_second": 16.929,
54
- "step": 1500
55
- },
56
- {
57
- "epoch": 0.2844141069397042,
58
- "grad_norm": 22.85436248779297,
59
- "learning_rate": 9.478672985781992e-06,
60
- "loss": 0.0911,
61
- "step": 2000
62
- },
63
- {
64
- "epoch": 0.35551763367463024,
65
- "grad_norm": 2.183819532394409,
66
- "learning_rate": 9.794585484040872e-06,
67
- "loss": 0.0904,
68
- "step": 2500
69
- },
70
- {
71
- "epoch": 0.42662116040955633,
72
- "grad_norm": 3.5085792541503906,
73
- "learning_rate": 9.53123354050353e-06,
74
- "loss": 0.0853,
75
- "step": 3000
76
- },
77
- {
78
- "epoch": 0.42662116040955633,
79
- "eval_accuracy": 0.97452,
80
- "eval_f1": 0.97444332998997,
81
- "eval_loss": 0.06914982199668884,
82
- "eval_precision": 0.9728430665705359,
83
- "eval_recall": 0.9760488667416815,
84
- "eval_runtime": 369.1227,
85
- "eval_samples_per_second": 135.456,
86
- "eval_steps_per_second": 16.932,
87
- "step": 3000
88
- },
89
- {
90
- "epoch": 0.49772468714448237,
91
- "grad_norm": 6.5320234298706055,
92
- "learning_rate": 9.267881596966186e-06,
93
- "loss": 0.0799,
94
- "step": 3500
95
- },
96
- {
97
- "epoch": 0.5688282138794084,
98
- "grad_norm": 1.3349543809890747,
99
- "learning_rate": 9.004529653428843e-06,
100
- "loss": 0.0751,
101
- "step": 4000
102
- },
103
- {
104
- "epoch": 0.6399317406143344,
105
- "grad_norm": 1.4865925312042236,
106
- "learning_rate": 8.7411777098915e-06,
107
- "loss": 0.0742,
108
- "step": 4500
109
- },
110
- {
111
- "epoch": 0.6399317406143344,
112
- "eval_accuracy": 0.97572,
113
- "eval_f1": 0.9756136756257282,
114
- "eval_loss": 0.06528624147176743,
115
- "eval_precision": 0.9753393846895333,
116
- "eval_recall": 0.9758881208808873,
117
- "eval_runtime": 369.1379,
118
- "eval_samples_per_second": 135.451,
119
- "eval_steps_per_second": 16.931,
120
- "step": 4500
121
- },
122
- {
123
- "epoch": 0.7110352673492605,
124
- "grad_norm": 1.9182387590408325,
125
- "learning_rate": 8.477825766354156e-06,
126
- "loss": 0.0761,
127
- "step": 5000
128
- },
129
- {
130
- "epoch": 0.7821387940841866,
131
- "grad_norm": 4.340898036956787,
132
- "learning_rate": 8.214473822816812e-06,
133
- "loss": 0.0742,
134
- "step": 5500
135
- },
136
- {
137
- "epoch": 0.8532423208191127,
138
- "grad_norm": 4.503939151763916,
139
- "learning_rate": 7.95112187927947e-06,
140
- "loss": 0.0722,
141
- "step": 6000
142
- },
143
- {
144
- "epoch": 0.8532423208191127,
145
- "eval_accuracy": 0.97488,
146
- "eval_f1": 0.9744964262508122,
147
- "eval_loss": 0.06521258503198624,
148
- "eval_precision": 0.9848957478246594,
149
- "eval_recall": 0.9643144189037133,
150
- "eval_runtime": 369.1106,
151
- "eval_samples_per_second": 135.461,
152
- "eval_steps_per_second": 16.933,
153
- "step": 6000
154
- },
155
- {
156
- "epoch": 0.9243458475540387,
157
- "grad_norm": 5.767714977264404,
158
- "learning_rate": 7.687769935742126e-06,
159
- "loss": 0.0664,
160
- "step": 6500
161
- },
162
- {
163
- "epoch": 0.9954493742889647,
164
- "grad_norm": 1.0236719846725464,
165
- "learning_rate": 7.424417992204783e-06,
166
- "loss": 0.0687,
167
- "step": 7000
168
- },
169
- {
170
- "epoch": 1.0665529010238908,
171
- "grad_norm": 5.575131416320801,
172
- "learning_rate": 7.1610660486674395e-06,
173
- "loss": 0.0594,
174
- "step": 7500
175
- },
176
- {
177
- "epoch": 1.0665529010238908,
178
- "eval_accuracy": 0.9734,
179
- "eval_f1": 0.9729013854930725,
180
- "eval_loss": 0.07639238238334656,
181
- "eval_precision": 0.9867333443544387,
182
- "eval_recall": 0.9594518566146921,
183
- "eval_runtime": 369.1258,
184
- "eval_samples_per_second": 135.455,
185
- "eval_steps_per_second": 16.932,
186
- "step": 7500
187
- },
188
- {
189
- "epoch": 1.1376564277588168,
190
- "grad_norm": 1.4480912685394287,
191
- "learning_rate": 6.8977141051300965e-06,
192
- "loss": 0.0613,
193
- "step": 8000
194
- },
195
- {
196
- "epoch": 1.2087599544937428,
197
- "grad_norm": 14.01652717590332,
198
- "learning_rate": 6.6343621615927535e-06,
199
- "loss": 0.0626,
200
- "step": 8500
201
- },
202
- {
203
- "epoch": 1.2798634812286689,
204
- "grad_norm": 2.645029067993164,
205
- "learning_rate": 6.3710102180554104e-06,
206
- "loss": 0.0595,
207
- "step": 9000
208
- },
209
- {
210
- "epoch": 1.2798634812286689,
211
- "eval_accuracy": 0.97674,
212
- "eval_f1": 0.9764427069618586,
213
- "eval_loss": 0.0677267462015152,
214
- "eval_precision": 0.9843986113947315,
215
- "eval_recall": 0.968614370679955,
216
- "eval_runtime": 369.1238,
217
- "eval_samples_per_second": 135.456,
218
- "eval_steps_per_second": 16.932,
219
- "step": 9000
220
- },
221
- {
222
- "epoch": 1.350967007963595,
223
- "grad_norm": 4.5592122077941895,
224
- "learning_rate": 6.1076582745180666e-06,
225
- "loss": 0.0618,
226
- "step": 9500
227
- },
228
- {
229
- "epoch": 1.4220705346985212,
230
- "grad_norm": 5.417106628417969,
231
- "learning_rate": 5.8443063309807235e-06,
232
- "loss": 0.058,
233
- "step": 10000
234
- },
235
- {
236
- "epoch": 1.493174061433447,
237
- "grad_norm": 1.136661171913147,
238
- "learning_rate": 5.5809543874433805e-06,
239
- "loss": 0.0542,
240
- "step": 10500
241
- },
242
- {
243
- "epoch": 1.493174061433447,
244
- "eval_accuracy": 0.97848,
245
- "eval_f1": 0.9783422567529487,
246
- "eval_loss": 0.06500901281833649,
247
- "eval_precision": 0.9800387127994193,
248
- "eval_recall": 0.9766516637196592,
249
- "eval_runtime": 369.2146,
250
- "eval_samples_per_second": 135.423,
251
- "eval_steps_per_second": 16.928,
252
- "step": 10500
253
- },
254
- {
255
- "epoch": 1.5642775881683733,
256
- "grad_norm": 2.5331344604492188,
257
- "learning_rate": 5.317602443906037e-06,
258
- "loss": 0.0623,
259
- "step": 11000
260
- },
261
- {
262
- "epoch": 1.635381114903299,
263
- "grad_norm": 2.5099124908447266,
264
- "learning_rate": 5.054250500368693e-06,
265
- "loss": 0.0617,
266
- "step": 11500
267
- },
268
- {
269
- "epoch": 1.7064846416382253,
270
- "grad_norm": 0.18802767992019653,
271
- "learning_rate": 4.79089855683135e-06,
272
- "loss": 0.0571,
273
- "step": 12000
274
- },
275
- {
276
- "epoch": 1.7064846416382253,
277
- "eval_accuracy": 0.97944,
278
- "eval_f1": 0.9793847511330366,
279
- "eval_loss": 0.05933361500501633,
280
- "eval_precision": 0.9774637739172204,
281
- "eval_recall": 0.9813132936826877,
282
- "eval_runtime": 369.1545,
283
- "eval_samples_per_second": 135.445,
284
- "eval_steps_per_second": 16.931,
285
- "step": 12000
286
- },
287
- {
288
- "epoch": 1.7775881683731511,
289
- "grad_norm": 0.17306402325630188,
290
- "learning_rate": 4.527546613294007e-06,
291
- "loss": 0.0575,
292
- "step": 12500
293
- },
294
- {
295
- "epoch": 1.8486916951080774,
296
- "grad_norm": 2.0170910358428955,
297
- "learning_rate": 4.264194669756664e-06,
298
- "loss": 0.0573,
299
- "step": 13000
300
- },
301
- {
302
- "epoch": 1.9197952218430034,
303
- "grad_norm": 1.0754927396774292,
304
- "learning_rate": 4.00084272621932e-06,
305
- "loss": 0.0562,
306
- "step": 13500
307
- },
308
- {
309
- "epoch": 1.9197952218430034,
310
- "eval_accuracy": 0.9793,
311
- "eval_f1": 0.9792272955343703,
312
- "eval_loss": 0.05992409214377403,
313
- "eval_precision": 0.9781083356721864,
314
- "eval_recall": 0.9803488185179232,
315
- "eval_runtime": 369.2584,
316
- "eval_samples_per_second": 135.407,
317
- "eval_steps_per_second": 16.926,
318
- "step": 13500
319
- },
320
- {
321
- "epoch": 1.9908987485779295,
322
- "grad_norm": 0.5176452398300171,
323
- "learning_rate": 3.7374907826819767e-06,
324
- "loss": 0.0553,
325
- "step": 14000
326
- },
327
- {
328
- "epoch": 2.0620022753128557,
329
- "grad_norm": 3.9174857139587402,
330
- "learning_rate": 3.474138839144633e-06,
331
- "loss": 0.0506,
332
- "step": 14500
333
- },
334
- {
335
- "epoch": 2.1331058020477816,
336
- "grad_norm": 2.6643998622894287,
337
- "learning_rate": 3.21078689560729e-06,
338
- "loss": 0.0463,
339
- "step": 15000
340
- },
341
- {
342
- "epoch": 2.1331058020477816,
343
- "eval_accuracy": 0.97976,
344
- "eval_f1": 0.9796746334605343,
345
- "eval_loss": 0.05948900803923607,
346
- "eval_precision": 0.9792419497309885,
347
- "eval_recall": 0.9801076997267321,
348
- "eval_runtime": 369.2026,
349
- "eval_samples_per_second": 135.427,
350
- "eval_steps_per_second": 16.928,
351
- "step": 15000
352
- }
353
- ],
354
- "logging_steps": 500,
355
- "max_steps": 21096,
356
- "num_input_tokens_seen": 0,
357
- "num_train_epochs": 3,
358
- "save_steps": 1500,
359
- "stateful_callbacks": {
360
- "TrainerControl": {
361
- "args": {
362
- "should_epoch_stop": false,
363
- "should_evaluate": false,
364
- "should_log": false,
365
- "should_save": true,
366
- "should_training_stop": false
367
- },
368
- "attributes": {}
369
- }
370
- },
371
- "total_flos": 2.5256135448428544e+17,
372
- "train_batch_size": 64,
373
- "trial_name": null,
374
- "trial_params": null
375
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
roberta-loc-classifier/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff399c0ee8d946a58ab10a5caf56fcb453cd55ab1150693829b072740be35bca
3
- size 5496
 
 
 
 
roberta-loc-classifier/vocab.json DELETED
The diff for this file is too large to render. See raw diff