hogru commited on
Commit
58d863b
·
1 Parent(s): 81179a6

Initial commit

Browse files
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "perplexity": 2.8322171542330707,
4
+ "test_accuracy": 0.35489852978435493,
5
+ "test_loss": 1.0410598516464233,
6
+ "test_runtime": 55.5978,
7
+ "test_samples_per_second": 4293.445,
8
+ "test_steps_per_second": 16.781,
9
+ "train_loss": 1.2392261905236766,
10
+ "train_runtime": 25350.8633,
11
+ "train_samples": 1273104,
12
+ "train_samples_per_second": 2510.968,
13
+ "train_steps_per_second": 9.808
14
+ }
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPT2LMHeadModel"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 0,
8
+ "embd_pdrop": 0.1,
9
+ "eos_token_id": 1,
10
+ "initializer_range": 0.02,
11
+ "layer_norm_epsilon": 1e-05,
12
+ "model_type": "gpt2",
13
+ "n_embd": 144,
14
+ "n_head": 12,
15
+ "n_inner": null,
16
+ "n_layer": 12,
17
+ "n_positions": 128,
18
+ "pad_token": " ",
19
+ "reorder_and_upcast_attn": false,
20
+ "resid_pdrop": 0.1,
21
+ "scale_attn_by_inverse_layer_idx": false,
22
+ "scale_attn_weights": true,
23
+ "summary_activation": null,
24
+ "summary_first_dropout": 0.1,
25
+ "summary_proj_to_labels": true,
26
+ "summary_type": "cls_index",
27
+ "summary_use_proj": true,
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.27.1",
30
+ "use_cache": true,
31
+ "vocab_size": 93
32
+ }
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_sample": true,
3
+ "eos_token_id": 1,
4
+ "length_penalty": 0.0,
5
+ "max_new_tokens": 128,
6
+ "min_new_tokens": 1,
7
+ "num_return_sequences": 100,
8
+ "pad_token_id": 2,
9
+ "transformers_version": "4.27.1"
10
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75711e4f676270ff40fcfc23ea74221564ba4dafd829420db85f2ef42cdc11dc
3
+ size 12414333
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "^",
3
+ "eos_token": "_",
4
+ "pad_token": " ",
5
+ "unk_token": "§"
6
+ }
test_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "perplexity": 2.8322171542330707,
4
+ "test_accuracy": 0.35489852978435493,
5
+ "test_loss": 1.0410598516464233,
6
+ "test_runtime": 55.5978,
7
+ "test_samples_per_second": 4293.445,
8
+ "test_steps_per_second": 16.781
9
+ }
tokenizer.json ADDED
@@ -0,0 +1,490 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "^",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "_",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": " ",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "§",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "°",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ }
51
+ ],
52
+ "normalizer": null,
53
+ "pre_tokenizer": null,
54
+ "post_processor": {
55
+ "type": "TemplateProcessing",
56
+ "single": [
57
+ {
58
+ "SpecialToken": {
59
+ "id": "^",
60
+ "type_id": 0
61
+ }
62
+ },
63
+ {
64
+ "Sequence": {
65
+ "id": "A",
66
+ "type_id": 0
67
+ }
68
+ },
69
+ {
70
+ "SpecialToken": {
71
+ "id": "_",
72
+ "type_id": 0
73
+ }
74
+ }
75
+ ],
76
+ "pair": [
77
+ {
78
+ "Sequence": {
79
+ "id": "A",
80
+ "type_id": 0
81
+ }
82
+ },
83
+ {
84
+ "Sequence": {
85
+ "id": "B",
86
+ "type_id": 1
87
+ }
88
+ }
89
+ ],
90
+ "special_tokens": {
91
+ "^": {
92
+ "id": "^",
93
+ "ids": [
94
+ 0
95
+ ],
96
+ "tokens": [
97
+ "^"
98
+ ]
99
+ },
100
+ "_": {
101
+ "id": "_",
102
+ "ids": [
103
+ 1
104
+ ],
105
+ "tokens": [
106
+ "_"
107
+ ]
108
+ }
109
+ }
110
+ },
111
+ "decoder": null,
112
+ "model": {
113
+ "type": "Unigram",
114
+ "unk_id": 3,
115
+ "vocab": [
116
+ [
117
+ "^",
118
+ 0.0
119
+ ],
120
+ [
121
+ "_",
122
+ 0.0
123
+ ],
124
+ [
125
+ " ",
126
+ 0.0
127
+ ],
128
+ [
129
+ "§",
130
+ 0.0
131
+ ],
132
+ [
133
+ "°",
134
+ 0.0
135
+ ],
136
+ [
137
+ "C",
138
+ -2.2339120063759843
139
+ ],
140
+ [
141
+ "c1",
142
+ -3.108178186508052
143
+ ],
144
+ [
145
+ "cc",
146
+ -3.2719370147830347
147
+ ],
148
+ [
149
+ "N",
150
+ -3.2932537094066436
151
+ ],
152
+ [
153
+ "O",
154
+ -3.294834188186176
155
+ ],
156
+ [
157
+ "n",
158
+ -3.3053461817854917
159
+ ],
160
+ [
161
+ ")",
162
+ -3.3475212288619822
163
+ ],
164
+ [
165
+ "CC",
166
+ -3.4312206506012544
167
+ ],
168
+ [
169
+ "(",
170
+ -3.5624560302888746
171
+ ],
172
+ [
173
+ "c(",
174
+ -3.765813889966134
175
+ ],
176
+ [
177
+ "c",
178
+ -3.8218193366886393
179
+ ],
180
+ [
181
+ "C(=O)",
182
+ -3.856958418790814
183
+ ],
184
+ [
185
+ "c3",
186
+ -3.9254062324983607
187
+ ],
188
+ [
189
+ "=",
190
+ -3.975334503903719
191
+ ],
192
+ [
193
+ "c2",
194
+ -4.003423620506245
195
+ ],
196
+ [
197
+ "1",
198
+ -4.112680698145304
199
+ ],
200
+ [
201
+ "(C)",
202
+ -4.121209869936163
203
+ ],
204
+ [
205
+ "2",
206
+ -4.1361761826809555
207
+ ],
208
+ [
209
+ "C1",
210
+ -4.21768981835341
211
+ ],
212
+ [
213
+ "c1ccc(",
214
+ -4.251735247120498
215
+ ],
216
+ [
217
+ "-",
218
+ -4.252411803847931
219
+ ],
220
+ [
221
+ "2)",
222
+ -4.270562896654599
223
+ ],
224
+ [
225
+ "cc(",
226
+ -4.276471688527964
227
+ ],
228
+ [
229
+ "C(",
230
+ -4.3086368671374995
231
+ ],
232
+ [
233
+ "CO",
234
+ -4.337904719405833
235
+ ],
236
+ [
237
+ "C(=O)N",
238
+ -4.428846224698317
239
+ ],
240
+ [
241
+ "3)",
242
+ -4.449247292348714
243
+ ],
244
+ [
245
+ "C2",
246
+ -4.522375726063721
247
+ ],
248
+ [
249
+ "O)",
250
+ -4.5371556203299654
251
+ ],
252
+ [
253
+ "CCC",
254
+ -4.589271525409394
255
+ ],
256
+ [
257
+ "3",
258
+ -4.724794788518322
259
+ ],
260
+ [
261
+ "C(C)",
262
+ -4.750491328820496
263
+ ],
264
+ [
265
+ "cc1",
266
+ -4.774712806549024
267
+ ],
268
+ [
269
+ "cn",
270
+ -4.786244699312933
271
+ ],
272
+ [
273
+ "NC(=O)",
274
+ -4.789019365908235
275
+ ],
276
+ [
277
+ "CC1",
278
+ -4.79366206708262
279
+ ],
280
+ [
281
+ "=O)",
282
+ -4.843480092939821
283
+ ],
284
+ [
285
+ "C)",
286
+ -4.8437048190443175
287
+ ],
288
+ [
289
+ "S",
290
+ -4.850239832224922
291
+ ],
292
+ [
293
+ "n1",
294
+ -4.864510173862389
295
+ ],
296
+ [
297
+ "c1ccccc1",
298
+ -4.92499844194878
299
+ ],
300
+ [
301
+ "c2ccc(",
302
+ -5.024291675089154
303
+ ],
304
+ [
305
+ "[nH]",
306
+ -5.110243493192227
307
+ ],
308
+ [
309
+ "c4",
310
+ -5.110439906920133
311
+ ],
312
+ [
313
+ "s",
314
+ -5.124712107023752
315
+ ],
316
+ [
317
+ "N1",
318
+ -5.13109013422166
319
+ ],
320
+ [
321
+ "o",
322
+ -5.144341645447907
323
+ ],
324
+ [
325
+ "F)",
326
+ -5.173986710079323
327
+ ],
328
+ [
329
+ "N(C",
330
+ -5.185845166641148
331
+ ],
332
+ [
333
+ "S(=O)(=O)",
334
+ -5.212557352968634
335
+ ],
336
+ [
337
+ "Cl)",
338
+ -5.220168182623269
339
+ ],
340
+ [
341
+ "c2ccc",
342
+ -5.2702619038121234
343
+ ],
344
+ [
345
+ "C(O)",
346
+ -5.276647874822254
347
+ ],
348
+ [
349
+ "2)cc1",
350
+ -5.314256517516519
351
+ ],
352
+ [
353
+ "O=C(",
354
+ -5.316263615262265
355
+ ],
356
+ [
357
+ "c3ccccc3",
358
+ -5.35319737432803
359
+ ],
360
+ [
361
+ "4",
362
+ -5.365584577986091
363
+ ],
364
+ [
365
+ "c(Cl)c",
366
+ -5.396247268723645
367
+ ],
368
+ [
369
+ "C=C",
370
+ -5.435516000963592
371
+ ],
372
+ [
373
+ "5",
374
+ -5.521017741806011
375
+ ],
376
+ [
377
+ "N2CC",
378
+ -5.731120719935269
379
+ ],
380
+ [
381
+ "c(F)c",
382
+ -5.761758311176596
383
+ ],
384
+ [
385
+ "C(F)(F)F)",
386
+ -5.816389560959275
387
+ ],
388
+ [
389
+ "[",
390
+ -5.9137028227376565
391
+ ],
392
+ [
393
+ "]",
394
+ -5.9137028227376565
395
+ ],
396
+ [
397
+ "c(OC)c",
398
+ -5.948373907493249
399
+ ],
400
+ [
401
+ "c(-c3cc",
402
+ -6.076591700432848
403
+ ],
404
+ [
405
+ "Br)",
406
+ -6.173257390983636
407
+ ],
408
+ [
409
+ "#",
410
+ -6.406719606169013
411
+ ],
412
+ [
413
+ "[N+](=O)[O-])",
414
+ -6.4134780734073065
415
+ ],
416
+ [
417
+ "+",
418
+ -6.459711891957882
419
+ ],
420
+ [
421
+ "F",
422
+ -6.6661526991253
423
+ ],
424
+ [
425
+ "P",
426
+ -7.117461698495431
427
+ ],
428
+ [
429
+ "6",
430
+ -7.298200155096458
431
+ ],
432
+ [
433
+ "B",
434
+ -8.020755755896921
435
+ ],
436
+ [
437
+ "I",
438
+ -8.076335551264686
439
+ ],
440
+ [
441
+ "7",
442
+ -9.064406674700315
443
+ ],
444
+ [
445
+ "H",
446
+ -9.766986825131063
447
+ ],
448
+ [
449
+ "8",
450
+ -10.67648343715202
451
+ ],
452
+ [
453
+ "9",
454
+ -11.6208264490498
455
+ ],
456
+ [
457
+ "%",
458
+ -13.14094059722758
459
+ ],
460
+ [
461
+ "0",
462
+ -13.512972359677438
463
+ ],
464
+ [
465
+ "p",
466
+ -13.73258273972798
467
+ ],
468
+ [
469
+ "l",
470
+ -17.659453778747075
471
+ ],
472
+ [
473
+ "e",
474
+ -17.659553778747075
475
+ ],
476
+ [
477
+ "i",
478
+ -17.659653778747074
479
+ ],
480
+ [
481
+ "r",
482
+ -17.659753778747074
483
+ ],
484
+ [
485
+ "b",
486
+ -17.659753778747074
487
+ ]
488
+ ]
489
+ }
490
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "^",
3
+ "eos_token": "_",
4
+ "model_max_length": 128,
5
+ "pad_token": " ",
6
+ "padding_side": "right",
7
+ "special_tokens_map_file": "../../tokenizers/char_unigram_88/special_tokens_map.json",
8
+ "tokenizer_class": "PreTrainedTokenizerFast",
9
+ "truncation_side": "left",
10
+ "unk_token": "§"
11
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "train_loss": 1.2392261905236766,
4
+ "train_runtime": 25350.8633,
5
+ "train_samples": 1273104,
6
+ "train_samples_per_second": 2510.968,
7
+ "train_steps_per_second": 9.808
8
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d7f23e857016ba44427fcedf175d39d5d54474fc70e0d9f748a4969ddc5394f
3
+ size 3643