CrabInHoney commited on
Commit
1b76370
1 Parent(s): d4a2135

Upload 6 files

Browse files
Files changed (6) hide show
  1. config.json +34 -0
  2. model.safetensors +3 -0
  3. special_tokens_map.json +37 -0
  4. tokenizer.json +664 -0
  5. tokenizer_config.json +58 -0
  6. vocab.txt +500 -0
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-tiny-finetuned",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 256,
11
+ "id2label": {
12
+ "0": "good",
13
+ "1": "phish"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 1024,
17
+ "label2id": {
18
+ "good": 0,
19
+ "phish": 1
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 64,
23
+ "model_type": "bert",
24
+ "num_attention_heads": 8,
25
+ "num_hidden_layers": 8,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "single_label_classification",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.44.2",
31
+ "type_vocab_size": 2,
32
+ "use_cache": true,
33
+ "vocab_size": 500
34
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4da2e4ba56c8ff971356fbc7d65fb133cfebdf75a23b6630fe7b2a7703cd9644
3
+ size 26134776
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
@@ -0,0 +1,664 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 64,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 64
12
+ },
13
+ "direction": "Right",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 0,
16
+ "pad_type_id": 0,
17
+ "pad_token": "[PAD]"
18
+ },
19
+ "added_tokens": [
20
+ {
21
+ "id": 0,
22
+ "content": "[PAD]",
23
+ "single_word": false,
24
+ "lstrip": false,
25
+ "rstrip": false,
26
+ "normalized": false,
27
+ "special": true
28
+ },
29
+ {
30
+ "id": 1,
31
+ "content": "[UNK]",
32
+ "single_word": false,
33
+ "lstrip": false,
34
+ "rstrip": false,
35
+ "normalized": false,
36
+ "special": true
37
+ },
38
+ {
39
+ "id": 2,
40
+ "content": "[CLS]",
41
+ "single_word": false,
42
+ "lstrip": false,
43
+ "rstrip": false,
44
+ "normalized": false,
45
+ "special": true
46
+ },
47
+ {
48
+ "id": 3,
49
+ "content": "[SEP]",
50
+ "single_word": false,
51
+ "lstrip": false,
52
+ "rstrip": false,
53
+ "normalized": false,
54
+ "special": true
55
+ },
56
+ {
57
+ "id": 4,
58
+ "content": "[MASK]",
59
+ "single_word": false,
60
+ "lstrip": false,
61
+ "rstrip": false,
62
+ "normalized": false,
63
+ "special": true
64
+ }
65
+ ],
66
+ "normalizer": {
67
+ "type": "BertNormalizer",
68
+ "clean_text": true,
69
+ "handle_chinese_chars": true,
70
+ "strip_accents": null,
71
+ "lowercase": true
72
+ },
73
+ "pre_tokenizer": {
74
+ "type": "BertPreTokenizer"
75
+ },
76
+ "post_processor": {
77
+ "type": "TemplateProcessing",
78
+ "single": [
79
+ {
80
+ "SpecialToken": {
81
+ "id": "[CLS]",
82
+ "type_id": 0
83
+ }
84
+ },
85
+ {
86
+ "Sequence": {
87
+ "id": "A",
88
+ "type_id": 0
89
+ }
90
+ },
91
+ {
92
+ "SpecialToken": {
93
+ "id": "[SEP]",
94
+ "type_id": 0
95
+ }
96
+ }
97
+ ],
98
+ "pair": [
99
+ {
100
+ "SpecialToken": {
101
+ "id": "[CLS]",
102
+ "type_id": 0
103
+ }
104
+ },
105
+ {
106
+ "Sequence": {
107
+ "id": "A",
108
+ "type_id": 0
109
+ }
110
+ },
111
+ {
112
+ "SpecialToken": {
113
+ "id": "[SEP]",
114
+ "type_id": 0
115
+ }
116
+ },
117
+ {
118
+ "Sequence": {
119
+ "id": "B",
120
+ "type_id": 1
121
+ }
122
+ },
123
+ {
124
+ "SpecialToken": {
125
+ "id": "[SEP]",
126
+ "type_id": 1
127
+ }
128
+ }
129
+ ],
130
+ "special_tokens": {
131
+ "[CLS]": {
132
+ "id": "[CLS]",
133
+ "ids": [
134
+ 2
135
+ ],
136
+ "tokens": [
137
+ "[CLS]"
138
+ ]
139
+ },
140
+ "[SEP]": {
141
+ "id": "[SEP]",
142
+ "ids": [
143
+ 3
144
+ ],
145
+ "tokens": [
146
+ "[SEP]"
147
+ ]
148
+ }
149
+ }
150
+ },
151
+ "decoder": {
152
+ "type": "WordPiece",
153
+ "prefix": "##",
154
+ "cleanup": true
155
+ },
156
+ "model": {
157
+ "type": "WordPiece",
158
+ "unk_token": "[UNK]",
159
+ "continuing_subword_prefix": "##",
160
+ "max_input_chars_per_word": 100,
161
+ "vocab": {
162
+ "[PAD]": 0,
163
+ "[UNK]": 1,
164
+ "[CLS]": 2,
165
+ "[SEP]": 3,
166
+ "[MASK]": 4,
167
+ "&": 5,
168
+ "'": 6,
169
+ "*": 7,
170
+ ",": 8,
171
+ "-": 9,
172
+ ".": 10,
173
+ "/": 11,
174
+ "0": 12,
175
+ "1": 13,
176
+ "2": 14,
177
+ "3": 15,
178
+ "4": 16,
179
+ "5": 17,
180
+ "6": 18,
181
+ "7": 19,
182
+ "8": 20,
183
+ "9": 21,
184
+ ":": 22,
185
+ ";": 23,
186
+ "_": 24,
187
+ "a": 25,
188
+ "b": 26,
189
+ "c": 27,
190
+ "d": 28,
191
+ "e": 29,
192
+ "f": 30,
193
+ "g": 31,
194
+ "h": 32,
195
+ "i": 33,
196
+ "j": 34,
197
+ "k": 35,
198
+ "l": 36,
199
+ "m": 37,
200
+ "n": 38,
201
+ "o": 39,
202
+ "p": 40,
203
+ "q": 41,
204
+ "r": 42,
205
+ "s": 43,
206
+ "t": 44,
207
+ "u": 45,
208
+ "v": 46,
209
+ "w": 47,
210
+ "x": 48,
211
+ "y": 49,
212
+ "z": 50,
213
+ "ã": 51,
214
+ "ä": 52,
215
+ "å": 53,
216
+ "æ": 54,
217
+ "ç": 55,
218
+ "é": 56,
219
+ "ë": 57,
220
+ "í": 58,
221
+ "ï": 59,
222
+ "ñ": 60,
223
+ "ó": 61,
224
+ "ô": 62,
225
+ "õ": 63,
226
+ "ö": 64,
227
+ "ø": 65,
228
+ "ú": 66,
229
+ "ü": 67,
230
+ "ń": 68,
231
+ "а": 69,
232
+ "г": 70,
233
+ "д": 71,
234
+ "е": 72,
235
+ "и": 73,
236
+ "н": 74,
237
+ "о": 75,
238
+ "р": 76,
239
+ "с": 77,
240
+ "ф": 78,
241
+ "я": 79,
242
+ "加": 80,
243
+ "大": 81,
244
+ "学": 82,
245
+ "師": 83,
246
+ "祥": 84,
247
+ "講": 85,
248
+ "贝": 86,
249
+ "通": 87,
250
+ "##a": 88,
251
+ "##t": 89,
252
+ "##h": 90,
253
+ "##l": 91,
254
+ "##i": 92,
255
+ "##n": 93,
256
+ "##e": 94,
257
+ "##d": 95,
258
+ "##y": 96,
259
+ "##o": 97,
260
+ "##u": 98,
261
+ "##b": 99,
262
+ "##6": 100,
263
+ "##s": 101,
264
+ "##c": 102,
265
+ "##r": 103,
266
+ "##p": 104,
267
+ "##k": 105,
268
+ "##j": 106,
269
+ "##g": 107,
270
+ "##w": 108,
271
+ "##m": 109,
272
+ "##z": 110,
273
+ "##v": 111,
274
+ "##q": 112,
275
+ "##f": 113,
276
+ "##x": 114,
277
+ "##8": 115,
278
+ "##1": 116,
279
+ "##2": 117,
280
+ "##0": 118,
281
+ "##7": 119,
282
+ "##9": 120,
283
+ "##3": 121,
284
+ "##4": 122,
285
+ "##5": 123,
286
+ "##ø": 124,
287
+ "##æ": 125,
288
+ "##ú": 126,
289
+ "##ä": 127,
290
+ "##ñ": 128,
291
+ "##é": 129,
292
+ "##е": 130,
293
+ "##ç": 131,
294
+ "##ã": 132,
295
+ "##õ": 133,
296
+ "##í": 134,
297
+ "##ф": 135,
298
+ "##ë": 136,
299
+ "##о": 137,
300
+ "##с": 138,
301
+ "##и": 139,
302
+ "##я": 140,
303
+ "##г": 141,
304
+ "##д": 142,
305
+ "##н": 143,
306
+ "##通": 144,
307
+ "##贝": 145,
308
+ "##祥": 146,
309
+ "##ń": 147,
310
+ "##ï": 148,
311
+ "##学": 149,
312
+ "##講": 150,
313
+ "##師": 151,
314
+ "##ö": 152,
315
+ "##ô": 153,
316
+ "##å": 154,
317
+ "##ó": 155,
318
+ "##ü": 156,
319
+ "##р": 157,
320
+ "##а": 158,
321
+ "co": 159,
322
+ "com": 160,
323
+ "##er": 161,
324
+ "##in": 162,
325
+ "##ar": 163,
326
+ "##ou": 164,
327
+ "##es": 165,
328
+ "##or": 166,
329
+ "##nt": 167,
330
+ "##et": 168,
331
+ "##al": 169,
332
+ "##ro": 170,
333
+ "##il": 171,
334
+ "##el": 172,
335
+ "##ic": 173,
336
+ "##ec": 174,
337
+ "##at": 175,
338
+ "##on": 176,
339
+ "##ac": 177,
340
+ "##it": 178,
341
+ "##is": 179,
342
+ "##an": 180,
343
+ "##ew": 181,
344
+ "##rc": 182,
345
+ "pro": 183,
346
+ "##ing": 184,
347
+ "##out": 185,
348
+ "##tp": 186,
349
+ "##ts": 187,
350
+ "##ap": 188,
351
+ "##og": 189,
352
+ "re": 190,
353
+ "##as": 191,
354
+ "##rg": 192,
355
+ "##pp": 193,
356
+ "org": 194,
357
+ "##am": 195,
358
+ "##ile": 196,
359
+ "##op": 197,
360
+ "st": 198,
361
+ "##ti": 199,
362
+ "##em": 200,
363
+ "ma": 201,
364
+ "##ol": 202,
365
+ "##ri": 203,
366
+ "ch": 204,
367
+ "##ort": 205,
368
+ "##ews": 206,
369
+ "##ers": 207,
370
+ "##en": 208,
371
+ "##ms": 209,
372
+ "##ch": 210,
373
+ "ab": 211,
374
+ "##du": 212,
375
+ "about": 213,
376
+ "##me": 214,
377
+ "##art": 215,
378
+ "su": 216,
379
+ "##re": 217,
380
+ "##nc": 218,
381
+ "##net": 219,
382
+ "##ad": 220,
383
+ "##ent": 221,
384
+ "##ourc": 222,
385
+ "tel": 223,
386
+ "##eam": 224,
387
+ "##dap": 225,
388
+ "ws": 226,
389
+ "se": 227,
390
+ "ldap": 228,
391
+ "ht": 229,
392
+ "do": 230,
393
+ "irc": 231,
394
+ "http": 232,
395
+ "ac": 233,
396
+ "##ter": 234,
397
+ "sh": 235,
398
+ "news": 236,
399
+ "bl": 237,
400
+ "##pi": 238,
401
+ "##erv": 239,
402
+ "##ard": 240,
403
+ "##ting": 241,
404
+ "##bo": 242,
405
+ "mail": 243,
406
+ "##ervic": 244,
407
+ "ad": 245,
408
+ "shop": 246,
409
+ "uk": 247,
410
+ "blog": 248,
411
+ "in": 249,
412
+ "api": 250,
413
+ "##ervices": 251,
414
+ "##st": 252,
415
+ "##arch": 253,
416
+ "##eg": 254,
417
+ "##min": 255,
418
+ "##act": 256,
419
+ "##duc": 257,
420
+ "##ash": 258,
421
+ "##ount": 259,
422
+ "##lo": 260,
423
+ "##riv": 261,
424
+ "acc": 262,
425
+ "br": 263,
426
+ "##elp": 264,
427
+ "cont": 265,
428
+ "ter": 266,
429
+ "log": 267,
430
+ "##eck": 268,
431
+ "reg": 269,
432
+ "##acy": 270,
433
+ "set": 271,
434
+ "##tings": 272,
435
+ "##pport": 273,
436
+ "prof": 274,
437
+ "##ducts": 275,
438
+ "##ister": 276,
439
+ "search": 277,
440
+ "##board": 278,
441
+ "settings": 279,
442
+ "priv": 280,
443
+ "terms": 281,
444
+ "services": 282,
445
+ "help": 283,
446
+ "register": 284,
447
+ "support": 285,
448
+ "account": 286,
449
+ "dash": 287,
450
+ "dashboard": 288,
451
+ "privacy": 289,
452
+ "login": 290,
453
+ "##mp": 291,
454
+ "contact": 292,
455
+ "check": 293,
456
+ "admin": 294,
457
+ "profile": 295,
458
+ "checkout": 296,
459
+ "products": 297,
460
+ "##io": 298,
461
+ "##eb": 299,
462
+ "##are": 300,
463
+ "##um": 301,
464
+ "##po": 302,
465
+ "##to": 303,
466
+ "##sh": 304,
467
+ "##all": 305,
468
+ "##vi": 306,
469
+ "nl": 307,
470
+ "##pe": 308,
471
+ "de": 309,
472
+ "##her": 310,
473
+ "##sy": 311,
474
+ "##ies": 312,
475
+ "##ial": 313,
476
+ "##ric": 314,
477
+ "##ob": 315,
478
+ "##rou": 316,
479
+ "sk": 317,
480
+ "sc": 318,
481
+ "vi": 319,
482
+ "au": 320,
483
+ "##cs": 321,
484
+ "##ners": 322,
485
+ "##ata": 323,
486
+ "##dis": 324,
487
+ "##roup": 325,
488
+ "##vent": 326,
489
+ "##ex": 327,
490
+ "##artners": 328,
491
+ "##ion": 329,
492
+ "##ag": 330,
493
+ "sky": 331,
494
+ "##wn": 332,
495
+ "##ource": 333,
496
+ "##cal": 334,
497
+ "##ery": 335,
498
+ "##mon": 336,
499
+ "##ed": 337,
500
+ "rt": 338,
501
+ "##fy": 339,
502
+ "for": 340,
503
+ "sf": 341,
504
+ "web": 342,
505
+ "res": 343,
506
+ "mag": 344,
507
+ "sv": 345,
508
+ "skype": 346,
509
+ "##co": 347,
510
+ "fa": 348,
511
+ "rtmp": 349,
512
+ "spo": 350,
513
+ "##tsp": 351,
514
+ "git": 352,
515
+ "gop": 353,
516
+ "xm": 354,
517
+ "data": 355,
518
+ "cat": 356,
519
+ "##rome": 357,
520
+ "xmpp": 358,
521
+ "##tify": 359,
522
+ "spotify": 360,
523
+ "##ntp": 361,
524
+ "gopher": 362,
525
+ "sftp": 363,
526
+ "##ects": 364,
527
+ "source": 365,
528
+ "ftp": 366,
529
+ "port": 367,
530
+ "##sync": 368,
531
+ "net": 369,
532
+ "rtsp": 370,
533
+ "view": 371,
534
+ "rsync": 372,
535
+ "vnc": 373,
536
+ "https": 374,
537
+ "chrome": 375,
538
+ "##vents": 376,
539
+ "ssh": 377,
540
+ "ldaps": 378,
541
+ "mailto": 379,
542
+ "wss": 380,
543
+ "nntp": 381,
544
+ "ircs": 382,
545
+ "scp": 383,
546
+ "svn": 384,
547
+ "telnet": 385,
548
+ "mms": 386,
549
+ "##ources": 387,
550
+ "magnet": 388,
551
+ "redis": 389,
552
+ "##esti": 390,
553
+ "##fol": 391,
554
+ "steam": 392,
555
+ "webcal": 393,
556
+ "file": 394,
557
+ "team": 395,
558
+ "##ads": 396,
559
+ "events": 397,
560
+ "##ials": 398,
561
+ "care": 399,
562
+ "sit": 400,
563
+ "##folio": 401,
564
+ "##jects": 402,
565
+ "portfolio": 403,
566
+ "partners": 404,
567
+ "##ure": 405,
568
+ "revi": 406,
569
+ "cart": 407,
570
+ "resources": 408,
571
+ "forum": 409,
572
+ "testi": 410,
573
+ "##monials": 411,
574
+ "testimonials": 412,
575
+ "reviews": 413,
576
+ "gall": 414,
577
+ "##emap": 415,
578
+ "##ories": 416,
579
+ "pric": 417,
580
+ "##us": 418,
581
+ "sitemap": 419,
582
+ "projects": 420,
583
+ "##and": 421,
584
+ "docs": 422,
585
+ "gallery": 423,
586
+ "faq": 424,
587
+ "down": 425,
588
+ "pricing": 426,
589
+ "##egories": 427,
590
+ "categories": 428,
591
+ "##loads": 429,
592
+ "downloads": 430,
593
+ "careers": 431,
594
+ "##ig": 432,
595
+ "##ore": 433,
596
+ "##ia": 434,
597
+ "##ur": 435,
598
+ "##sc": 436,
599
+ "##eta": 437,
600
+ "sec": 438,
601
+ "##ww": 439,
602
+ "##emo": 440,
603
+ "cd": 441,
604
+ "mob": 442,
605
+ "app": 443,
606
+ "##atic": 444,
607
+ "www": 445,
608
+ "dev": 446,
609
+ "mobile": 447,
610
+ "secure": 448,
611
+ "cdn": 449,
612
+ "store": 450,
613
+ "demo": 451,
614
+ "beta": 452,
615
+ "##ech": 453,
616
+ "static": 454,
617
+ "ho": 455,
618
+ "##ra": 456,
619
+ "##ons": 457,
620
+ "##le": 458,
621
+ "ca": 459,
622
+ "##ul": 460,
623
+ "##be": 461,
624
+ "it": 462,
625
+ "##ov": 463,
626
+ "##ir": 464,
627
+ "##group": 465,
628
+ "edu": 466,
629
+ "fr": 467,
630
+ "##th": 468,
631
+ "##id": 469,
632
+ "##os": 470,
633
+ "##om": 471,
634
+ "ind": 472,
635
+ "##ab": 473,
636
+ "##un": 474,
637
+ "##ot": 475,
638
+ "##est": 476,
639
+ "##ay": 477,
640
+ "##ation": 478,
641
+ "home": 479,
642
+ "##ru": 480,
643
+ "##anc": 481,
644
+ "th": 482,
645
+ "sub": 483,
646
+ "##ribe": 484,
647
+ "index": 485,
648
+ "##od": 486,
649
+ "##scribe": 487,
650
+ "subscribe": 488,
651
+ "##of": 489,
652
+ "al": 490,
653
+ "##oc": 491,
654
+ "##ity": 492,
655
+ "##iv": 493,
656
+ "##ine": 494,
657
+ "##inc": 495,
658
+ "##im": 496,
659
+ "ar": 497,
660
+ "tr": 498,
661
+ "es": 499
662
+ }
663
+ }
664
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "lowercase": true,
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
vocab.txt ADDED
@@ -0,0 +1,500 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [PAD]
2
+ [UNK]
3
+ [CLS]
4
+ [SEP]
5
+ [MASK]
6
+ &
7
+ '
8
+ *
9
+ ,
10
+ -
11
+ .
12
+ /
13
+ 0
14
+ 1
15
+ 2
16
+ 3
17
+ 4
18
+ 5
19
+ 6
20
+ 7
21
+ 8
22
+ 9
23
+ :
24
+ ;
25
+ _
26
+ a
27
+ b
28
+ c
29
+ d
30
+ e
31
+ f
32
+ g
33
+ h
34
+ i
35
+ j
36
+ k
37
+ l
38
+ m
39
+ n
40
+ o
41
+ p
42
+ q
43
+ r
44
+ s
45
+ t
46
+ u
47
+ v
48
+ w
49
+ x
50
+ y
51
+ z
52
+ ã
53
+ ä
54
+ å
55
+ æ
56
+ ç
57
+ é
58
+ ë
59
+ í
60
+ ï
61
+ ñ
62
+ ó
63
+ ô
64
+ õ
65
+ ö
66
+ ø
67
+ ú
68
+ ü
69
+ ń
70
+ а
71
+ г
72
+ д
73
+ е
74
+ и
75
+ н
76
+ о
77
+ р
78
+ с
79
+ ф
80
+ я
81
+
82
+
83
+
84
+
85
+
86
+
87
+
88
+
89
+ ##a
90
+ ##t
91
+ ##h
92
+ ##l
93
+ ##i
94
+ ##n
95
+ ##e
96
+ ##d
97
+ ##y
98
+ ##o
99
+ ##u
100
+ ##b
101
+ ##6
102
+ ##s
103
+ ##c
104
+ ##r
105
+ ##p
106
+ ##k
107
+ ##j
108
+ ##g
109
+ ##w
110
+ ##m
111
+ ##z
112
+ ##v
113
+ ##q
114
+ ##f
115
+ ##x
116
+ ##8
117
+ ##1
118
+ ##2
119
+ ##0
120
+ ##7
121
+ ##9
122
+ ##3
123
+ ##4
124
+ ##5
125
+ ##ø
126
+ ##æ
127
+ ##ú
128
+ ##ä
129
+ ##ñ
130
+ ##é
131
+ ##е
132
+ ##ç
133
+ ##ã
134
+ ##õ
135
+ ##í
136
+ ##ф
137
+ ##ë
138
+ ##о
139
+ ##с
140
+ ##и
141
+ ##я
142
+ ##г
143
+ ##д
144
+ ##н
145
+ ##通
146
+ ##贝
147
+ ##祥
148
+ ##ń
149
+ ##ï
150
+ ##学
151
+ ##講
152
+ ##師
153
+ ##ö
154
+ ##ô
155
+ ##å
156
+ ##ó
157
+ ##ü
158
+ ##р
159
+ ##а
160
+ co
161
+ com
162
+ ##er
163
+ ##in
164
+ ##ar
165
+ ##ou
166
+ ##es
167
+ ##or
168
+ ##nt
169
+ ##et
170
+ ##al
171
+ ##ro
172
+ ##il
173
+ ##el
174
+ ##ic
175
+ ##ec
176
+ ##at
177
+ ##on
178
+ ##ac
179
+ ##it
180
+ ##is
181
+ ##an
182
+ ##ew
183
+ ##rc
184
+ pro
185
+ ##ing
186
+ ##out
187
+ ##tp
188
+ ##ts
189
+ ##ap
190
+ ##og
191
+ re
192
+ ##as
193
+ ##rg
194
+ ##pp
195
+ org
196
+ ##am
197
+ ##ile
198
+ ##op
199
+ st
200
+ ##ti
201
+ ##em
202
+ ma
203
+ ##ol
204
+ ##ri
205
+ ch
206
+ ##ort
207
+ ##ews
208
+ ##ers
209
+ ##en
210
+ ##ms
211
+ ##ch
212
+ ab
213
+ ##du
214
+ about
215
+ ##me
216
+ ##art
217
+ su
218
+ ##re
219
+ ##nc
220
+ ##net
221
+ ##ad
222
+ ##ent
223
+ ##ourc
224
+ tel
225
+ ##eam
226
+ ##dap
227
+ ws
228
+ se
229
+ ldap
230
+ ht
231
+ do
232
+ irc
233
+ http
234
+ ac
235
+ ##ter
236
+ sh
237
+ news
238
+ bl
239
+ ##pi
240
+ ##erv
241
+ ##ard
242
+ ##ting
243
+ ##bo
244
+ mail
245
+ ##ervic
246
+ ad
247
+ shop
248
+ uk
249
+ blog
250
+ in
251
+ api
252
+ ##ervices
253
+ ##st
254
+ ##arch
255
+ ##eg
256
+ ##min
257
+ ##act
258
+ ##duc
259
+ ##ash
260
+ ##ount
261
+ ##lo
262
+ ##riv
263
+ acc
264
+ br
265
+ ##elp
266
+ cont
267
+ ter
268
+ log
269
+ ##eck
270
+ reg
271
+ ##acy
272
+ set
273
+ ##tings
274
+ ##pport
275
+ prof
276
+ ##ducts
277
+ ##ister
278
+ search
279
+ ##board
280
+ settings
281
+ priv
282
+ terms
283
+ services
284
+ help
285
+ register
286
+ support
287
+ account
288
+ dash
289
+ dashboard
290
+ privacy
291
+ login
292
+ ##mp
293
+ contact
294
+ check
295
+ admin
296
+ profile
297
+ checkout
298
+ products
299
+ ##io
300
+ ##eb
301
+ ##are
302
+ ##um
303
+ ##po
304
+ ##to
305
+ ##sh
306
+ ##all
307
+ ##vi
308
+ nl
309
+ ##pe
310
+ de
311
+ ##her
312
+ ##sy
313
+ ##ies
314
+ ##ial
315
+ ##ric
316
+ ##ob
317
+ ##rou
318
+ sk
319
+ sc
320
+ vi
321
+ au
322
+ ##cs
323
+ ##ners
324
+ ##ata
325
+ ##dis
326
+ ##roup
327
+ ##vent
328
+ ##ex
329
+ ##artners
330
+ ##ion
331
+ ##ag
332
+ sky
333
+ ##wn
334
+ ##ource
335
+ ##cal
336
+ ##ery
337
+ ##mon
338
+ ##ed
339
+ rt
340
+ ##fy
341
+ for
342
+ sf
343
+ web
344
+ res
345
+ mag
346
+ sv
347
+ skype
348
+ ##co
349
+ fa
350
+ rtmp
351
+ spo
352
+ ##tsp
353
+ git
354
+ gop
355
+ xm
356
+ data
357
+ cat
358
+ ##rome
359
+ xmpp
360
+ ##tify
361
+ spotify
362
+ ##ntp
363
+ gopher
364
+ sftp
365
+ ##ects
366
+ source
367
+ ftp
368
+ port
369
+ ##sync
370
+ net
371
+ rtsp
372
+ view
373
+ rsync
374
+ vnc
375
+ https
376
+ chrome
377
+ ##vents
378
+ ssh
379
+ ldaps
380
+ mailto
381
+ wss
382
+ nntp
383
+ ircs
384
+ scp
385
+ svn
386
+ telnet
387
+ mms
388
+ ##ources
389
+ magnet
390
+ redis
391
+ ##esti
392
+ ##fol
393
+ steam
394
+ webcal
395
+ file
396
+ team
397
+ ##ads
398
+ events
399
+ ##ials
400
+ care
401
+ sit
402
+ ##folio
403
+ ##jects
404
+ portfolio
405
+ partners
406
+ ##ure
407
+ revi
408
+ cart
409
+ resources
410
+ forum
411
+ testi
412
+ ##monials
413
+ testimonials
414
+ reviews
415
+ gall
416
+ ##emap
417
+ ##ories
418
+ pric
419
+ ##us
420
+ sitemap
421
+ projects
422
+ ##and
423
+ docs
424
+ gallery
425
+ faq
426
+ down
427
+ pricing
428
+ ##egories
429
+ categories
430
+ ##loads
431
+ downloads
432
+ careers
433
+ ##ig
434
+ ##ore
435
+ ##ia
436
+ ##ur
437
+ ##sc
438
+ ##eta
439
+ sec
440
+ ##ww
441
+ ##emo
442
+ cd
443
+ mob
444
+ app
445
+ ##atic
446
+ www
447
+ dev
448
+ mobile
449
+ secure
450
+ cdn
451
+ store
452
+ demo
453
+ beta
454
+ ##ech
455
+ static
456
+ ho
457
+ ##ra
458
+ ##ons
459
+ ##le
460
+ ca
461
+ ##ul
462
+ ##be
463
+ it
464
+ ##ov
465
+ ##ir
466
+ ##group
467
+ edu
468
+ fr
469
+ ##th
470
+ ##id
471
+ ##os
472
+ ##om
473
+ ind
474
+ ##ab
475
+ ##un
476
+ ##ot
477
+ ##est
478
+ ##ay
479
+ ##ation
480
+ home
481
+ ##ru
482
+ ##anc
483
+ th
484
+ sub
485
+ ##ribe
486
+ index
487
+ ##od
488
+ ##scribe
489
+ subscribe
490
+ ##of
491
+ al
492
+ ##oc
493
+ ##ity
494
+ ##iv
495
+ ##ine
496
+ ##inc
497
+ ##im
498
+ ar
499
+ tr
500
+ es