kanelindsay2000 commited on
Commit
b2268b8
·
verified ·
1 Parent(s): 68d4880

Upload model files to root directory

Browse files
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "roberta-base",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "problem_type": "single_label_classification",
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.43.1",
25
+ "type_vocab_size": 1,
26
+ "use_cache": true,
27
+ "vocab_size": 50265
28
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c094612cea9dcaf5905c0da642fcaf038dc423743738e31f22e0c94afa2581ca
3
+ size 498612824
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3bac9d1e000bf018f003b16b3d68528d793d5e9edc5d44f9706910afcffc234
3
+ size 997345530
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71e46d01902a6e5e86b53d78c5c1fb888a4d7a73e0132f7b3c5608010601324c
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4271b529ffd68ad25597aaede6f8cfc879e800716a6412047ca47388f094caff
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "mask_token": "<mask>",
51
+ "model_max_length": 512,
52
+ "pad_token": "<pad>",
53
+ "sep_token": "</s>",
54
+ "tokenizer_class": "RobertaTokenizer",
55
+ "unk_token": "<unk>"
56
+ }
trainer_state.json ADDED
@@ -0,0 +1,375 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.05933361500501633,
3
+ "best_model_checkpoint": "runs/roberta-base-500000-samples-512-max-len-64-train-batch-size-8-test-batch-size-3-epochs-1e-05-lr-0.1-warmup-ratio/checkpoint-12000",
4
+ "epoch": 2.1331058020477816,
5
+ "eval_steps": 1500,
6
+ "global_step": 15000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0,
13
+ "eval_accuracy": 0.4971,
14
+ "eval_f1": 0.6639222657346396,
15
+ "eval_loss": 0.6927798390388489,
16
+ "eval_precision": 0.4973866025833584,
17
+ "eval_recall": 0.9981112361356695,
18
+ "eval_runtime": 372.4207,
19
+ "eval_samples_per_second": 134.257,
20
+ "eval_steps_per_second": 16.782,
21
+ "step": 0
22
+ },
23
+ {
24
+ "epoch": 0.07110352673492605,
25
+ "grad_norm": 11.443495750427246,
26
+ "learning_rate": 2.369668246445498e-06,
27
+ "loss": 0.4665,
28
+ "step": 500
29
+ },
30
+ {
31
+ "epoch": 0.1422070534698521,
32
+ "grad_norm": 5.395324230194092,
33
+ "learning_rate": 4.739336492890996e-06,
34
+ "loss": 0.1226,
35
+ "step": 1000
36
+ },
37
+ {
38
+ "epoch": 0.21331058020477817,
39
+ "grad_norm": 3.713897705078125,
40
+ "learning_rate": 7.1090047393364935e-06,
41
+ "loss": 0.1013,
42
+ "step": 1500
43
+ },
44
+ {
45
+ "epoch": 0.21331058020477817,
46
+ "eval_accuracy": 0.96946,
47
+ "eval_f1": 0.9694067677759302,
48
+ "eval_loss": 0.08184666186571121,
49
+ "eval_precision": 0.9665987454552719,
50
+ "eval_recall": 0.9722311525478219,
51
+ "eval_runtime": 369.1947,
52
+ "eval_samples_per_second": 135.43,
53
+ "eval_steps_per_second": 16.929,
54
+ "step": 1500
55
+ },
56
+ {
57
+ "epoch": 0.2844141069397042,
58
+ "grad_norm": 22.85436248779297,
59
+ "learning_rate": 9.478672985781992e-06,
60
+ "loss": 0.0911,
61
+ "step": 2000
62
+ },
63
+ {
64
+ "epoch": 0.35551763367463024,
65
+ "grad_norm": 2.183819532394409,
66
+ "learning_rate": 9.794585484040872e-06,
67
+ "loss": 0.0904,
68
+ "step": 2500
69
+ },
70
+ {
71
+ "epoch": 0.42662116040955633,
72
+ "grad_norm": 3.5085792541503906,
73
+ "learning_rate": 9.53123354050353e-06,
74
+ "loss": 0.0853,
75
+ "step": 3000
76
+ },
77
+ {
78
+ "epoch": 0.42662116040955633,
79
+ "eval_accuracy": 0.97452,
80
+ "eval_f1": 0.97444332998997,
81
+ "eval_loss": 0.06914982199668884,
82
+ "eval_precision": 0.9728430665705359,
83
+ "eval_recall": 0.9760488667416815,
84
+ "eval_runtime": 369.1227,
85
+ "eval_samples_per_second": 135.456,
86
+ "eval_steps_per_second": 16.932,
87
+ "step": 3000
88
+ },
89
+ {
90
+ "epoch": 0.49772468714448237,
91
+ "grad_norm": 6.5320234298706055,
92
+ "learning_rate": 9.267881596966186e-06,
93
+ "loss": 0.0799,
94
+ "step": 3500
95
+ },
96
+ {
97
+ "epoch": 0.5688282138794084,
98
+ "grad_norm": 1.3349543809890747,
99
+ "learning_rate": 9.004529653428843e-06,
100
+ "loss": 0.0751,
101
+ "step": 4000
102
+ },
103
+ {
104
+ "epoch": 0.6399317406143344,
105
+ "grad_norm": 1.4865925312042236,
106
+ "learning_rate": 8.7411777098915e-06,
107
+ "loss": 0.0742,
108
+ "step": 4500
109
+ },
110
+ {
111
+ "epoch": 0.6399317406143344,
112
+ "eval_accuracy": 0.97572,
113
+ "eval_f1": 0.9756136756257282,
114
+ "eval_loss": 0.06528624147176743,
115
+ "eval_precision": 0.9753393846895333,
116
+ "eval_recall": 0.9758881208808873,
117
+ "eval_runtime": 369.1379,
118
+ "eval_samples_per_second": 135.451,
119
+ "eval_steps_per_second": 16.931,
120
+ "step": 4500
121
+ },
122
+ {
123
+ "epoch": 0.7110352673492605,
124
+ "grad_norm": 1.9182387590408325,
125
+ "learning_rate": 8.477825766354156e-06,
126
+ "loss": 0.0761,
127
+ "step": 5000
128
+ },
129
+ {
130
+ "epoch": 0.7821387940841866,
131
+ "grad_norm": 4.340898036956787,
132
+ "learning_rate": 8.214473822816812e-06,
133
+ "loss": 0.0742,
134
+ "step": 5500
135
+ },
136
+ {
137
+ "epoch": 0.8532423208191127,
138
+ "grad_norm": 4.503939151763916,
139
+ "learning_rate": 7.95112187927947e-06,
140
+ "loss": 0.0722,
141
+ "step": 6000
142
+ },
143
+ {
144
+ "epoch": 0.8532423208191127,
145
+ "eval_accuracy": 0.97488,
146
+ "eval_f1": 0.9744964262508122,
147
+ "eval_loss": 0.06521258503198624,
148
+ "eval_precision": 0.9848957478246594,
149
+ "eval_recall": 0.9643144189037133,
150
+ "eval_runtime": 369.1106,
151
+ "eval_samples_per_second": 135.461,
152
+ "eval_steps_per_second": 16.933,
153
+ "step": 6000
154
+ },
155
+ {
156
+ "epoch": 0.9243458475540387,
157
+ "grad_norm": 5.767714977264404,
158
+ "learning_rate": 7.687769935742126e-06,
159
+ "loss": 0.0664,
160
+ "step": 6500
161
+ },
162
+ {
163
+ "epoch": 0.9954493742889647,
164
+ "grad_norm": 1.0236719846725464,
165
+ "learning_rate": 7.424417992204783e-06,
166
+ "loss": 0.0687,
167
+ "step": 7000
168
+ },
169
+ {
170
+ "epoch": 1.0665529010238908,
171
+ "grad_norm": 5.575131416320801,
172
+ "learning_rate": 7.1610660486674395e-06,
173
+ "loss": 0.0594,
174
+ "step": 7500
175
+ },
176
+ {
177
+ "epoch": 1.0665529010238908,
178
+ "eval_accuracy": 0.9734,
179
+ "eval_f1": 0.9729013854930725,
180
+ "eval_loss": 0.07639238238334656,
181
+ "eval_precision": 0.9867333443544387,
182
+ "eval_recall": 0.9594518566146921,
183
+ "eval_runtime": 369.1258,
184
+ "eval_samples_per_second": 135.455,
185
+ "eval_steps_per_second": 16.932,
186
+ "step": 7500
187
+ },
188
+ {
189
+ "epoch": 1.1376564277588168,
190
+ "grad_norm": 1.4480912685394287,
191
+ "learning_rate": 6.8977141051300965e-06,
192
+ "loss": 0.0613,
193
+ "step": 8000
194
+ },
195
+ {
196
+ "epoch": 1.2087599544937428,
197
+ "grad_norm": 14.01652717590332,
198
+ "learning_rate": 6.6343621615927535e-06,
199
+ "loss": 0.0626,
200
+ "step": 8500
201
+ },
202
+ {
203
+ "epoch": 1.2798634812286689,
204
+ "grad_norm": 2.645029067993164,
205
+ "learning_rate": 6.3710102180554104e-06,
206
+ "loss": 0.0595,
207
+ "step": 9000
208
+ },
209
+ {
210
+ "epoch": 1.2798634812286689,
211
+ "eval_accuracy": 0.97674,
212
+ "eval_f1": 0.9764427069618586,
213
+ "eval_loss": 0.0677267462015152,
214
+ "eval_precision": 0.9843986113947315,
215
+ "eval_recall": 0.968614370679955,
216
+ "eval_runtime": 369.1238,
217
+ "eval_samples_per_second": 135.456,
218
+ "eval_steps_per_second": 16.932,
219
+ "step": 9000
220
+ },
221
+ {
222
+ "epoch": 1.350967007963595,
223
+ "grad_norm": 4.5592122077941895,
224
+ "learning_rate": 6.1076582745180666e-06,
225
+ "loss": 0.0618,
226
+ "step": 9500
227
+ },
228
+ {
229
+ "epoch": 1.4220705346985212,
230
+ "grad_norm": 5.417106628417969,
231
+ "learning_rate": 5.8443063309807235e-06,
232
+ "loss": 0.058,
233
+ "step": 10000
234
+ },
235
+ {
236
+ "epoch": 1.493174061433447,
237
+ "grad_norm": 1.136661171913147,
238
+ "learning_rate": 5.5809543874433805e-06,
239
+ "loss": 0.0542,
240
+ "step": 10500
241
+ },
242
+ {
243
+ "epoch": 1.493174061433447,
244
+ "eval_accuracy": 0.97848,
245
+ "eval_f1": 0.9783422567529487,
246
+ "eval_loss": 0.06500901281833649,
247
+ "eval_precision": 0.9800387127994193,
248
+ "eval_recall": 0.9766516637196592,
249
+ "eval_runtime": 369.2146,
250
+ "eval_samples_per_second": 135.423,
251
+ "eval_steps_per_second": 16.928,
252
+ "step": 10500
253
+ },
254
+ {
255
+ "epoch": 1.5642775881683733,
256
+ "grad_norm": 2.5331344604492188,
257
+ "learning_rate": 5.317602443906037e-06,
258
+ "loss": 0.0623,
259
+ "step": 11000
260
+ },
261
+ {
262
+ "epoch": 1.635381114903299,
263
+ "grad_norm": 2.5099124908447266,
264
+ "learning_rate": 5.054250500368693e-06,
265
+ "loss": 0.0617,
266
+ "step": 11500
267
+ },
268
+ {
269
+ "epoch": 1.7064846416382253,
270
+ "grad_norm": 0.18802767992019653,
271
+ "learning_rate": 4.79089855683135e-06,
272
+ "loss": 0.0571,
273
+ "step": 12000
274
+ },
275
+ {
276
+ "epoch": 1.7064846416382253,
277
+ "eval_accuracy": 0.97944,
278
+ "eval_f1": 0.9793847511330366,
279
+ "eval_loss": 0.05933361500501633,
280
+ "eval_precision": 0.9774637739172204,
281
+ "eval_recall": 0.9813132936826877,
282
+ "eval_runtime": 369.1545,
283
+ "eval_samples_per_second": 135.445,
284
+ "eval_steps_per_second": 16.931,
285
+ "step": 12000
286
+ },
287
+ {
288
+ "epoch": 1.7775881683731511,
289
+ "grad_norm": 0.17306402325630188,
290
+ "learning_rate": 4.527546613294007e-06,
291
+ "loss": 0.0575,
292
+ "step": 12500
293
+ },
294
+ {
295
+ "epoch": 1.8486916951080774,
296
+ "grad_norm": 2.0170910358428955,
297
+ "learning_rate": 4.264194669756664e-06,
298
+ "loss": 0.0573,
299
+ "step": 13000
300
+ },
301
+ {
302
+ "epoch": 1.9197952218430034,
303
+ "grad_norm": 1.0754927396774292,
304
+ "learning_rate": 4.00084272621932e-06,
305
+ "loss": 0.0562,
306
+ "step": 13500
307
+ },
308
+ {
309
+ "epoch": 1.9197952218430034,
310
+ "eval_accuracy": 0.9793,
311
+ "eval_f1": 0.9792272955343703,
312
+ "eval_loss": 0.05992409214377403,
313
+ "eval_precision": 0.9781083356721864,
314
+ "eval_recall": 0.9803488185179232,
315
+ "eval_runtime": 369.2584,
316
+ "eval_samples_per_second": 135.407,
317
+ "eval_steps_per_second": 16.926,
318
+ "step": 13500
319
+ },
320
+ {
321
+ "epoch": 1.9908987485779295,
322
+ "grad_norm": 0.5176452398300171,
323
+ "learning_rate": 3.7374907826819767e-06,
324
+ "loss": 0.0553,
325
+ "step": 14000
326
+ },
327
+ {
328
+ "epoch": 2.0620022753128557,
329
+ "grad_norm": 3.9174857139587402,
330
+ "learning_rate": 3.474138839144633e-06,
331
+ "loss": 0.0506,
332
+ "step": 14500
333
+ },
334
+ {
335
+ "epoch": 2.1331058020477816,
336
+ "grad_norm": 2.6643998622894287,
337
+ "learning_rate": 3.21078689560729e-06,
338
+ "loss": 0.0463,
339
+ "step": 15000
340
+ },
341
+ {
342
+ "epoch": 2.1331058020477816,
343
+ "eval_accuracy": 0.97976,
344
+ "eval_f1": 0.9796746334605343,
345
+ "eval_loss": 0.05948900803923607,
346
+ "eval_precision": 0.9792419497309885,
347
+ "eval_recall": 0.9801076997267321,
348
+ "eval_runtime": 369.2026,
349
+ "eval_samples_per_second": 135.427,
350
+ "eval_steps_per_second": 16.928,
351
+ "step": 15000
352
+ }
353
+ ],
354
+ "logging_steps": 500,
355
+ "max_steps": 21096,
356
+ "num_input_tokens_seen": 0,
357
+ "num_train_epochs": 3,
358
+ "save_steps": 1500,
359
+ "stateful_callbacks": {
360
+ "TrainerControl": {
361
+ "args": {
362
+ "should_epoch_stop": false,
363
+ "should_evaluate": false,
364
+ "should_log": false,
365
+ "should_save": true,
366
+ "should_training_stop": false
367
+ },
368
+ "attributes": {}
369
+ }
370
+ },
371
+ "total_flos": 2.5256135448428544e+17,
372
+ "train_batch_size": 64,
373
+ "trial_name": null,
374
+ "trial_params": null
375
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff399c0ee8d946a58ab10a5caf56fcb453cd55ab1150693829b072740be35bca
3
+ size 5496
vocab.json ADDED
The diff for this file is too large to render. See raw diff