asude55 commited on
Commit
888da38
·
verified ·
1 Parent(s): 8f7ebd0

Update tokenizer.json

Browse files
Files changed (1) hide show
  1. tokenizer.json +0 -107
tokenizer.json CHANGED
@@ -1,107 +0,0 @@
1
- {
2
- "version": "0.1",
3
- "truncation": {
4
- "max_length": 512,
5
- "stride": 0,
6
- "strategy": "longest_first"
7
- },
8
- "padding": {
9
- "strategy": "longest",
10
- "max_length": 512,
11
- "pad_to_multiple_of": null
12
- },
13
- "added_tokens": [
14
- {
15
- "id": 0,
16
- "content": "[PAD]",
17
- "single_word": false,
18
- "lstrip": false,
19
- "rstrip": false,
20
- "normalized": false,
21
- "special": true
22
- },
23
- {
24
- "id": 1,
25
- "content": "[UNK]",
26
- "single_word": false,
27
- "lstrip": false,
28
- "rstrip": false,
29
- "normalized": false,
30
- "special": true
31
- },
32
- {
33
- "id": 2,
34
- "content": "[CLS]",
35
- "single_word": false,
36
- "lstrip": false,
37
- "rstrip": false,
38
- "normalized": false,
39
- "special": true
40
- },
41
- {
42
- "id": 3,
43
- "content": "[SEP]",
44
- "single_word": false,
45
- "lstrip": false,
46
- "rstrip": false,
47
- "normalized": false,
48
- "special": true
49
- },
50
- {
51
- "id": 4,
52
- "content": "[MASK]",
53
- "single_word": false,
54
- "lstrip": false,
55
- "rstrip": false,
56
- "normalized": false,
57
- "special": true
58
- }
59
- ],
60
- "normalizer": {
61
- "type": "BertNormalizer",
62
- "clean_text": true,
63
- "handle_chinese_chars": true,
64
- "strip_accents": null,
65
- "lowercase": false
66
- },
67
- "pre_tokenizer": {
68
- "type": "BertPreTokenizer"
69
- },
70
- "post_processor": {
71
- "type": "BertPostProcessor",
72
- "sep": {
73
- "type": "AddedToken",
74
- "content": "[SEP]",
75
- "single_word": false,
76
- "lstrip": false,
77
- "rstrip": false,
78
- "normalized": false
79
- },
80
- "cls": {
81
- "type": "AddedToken",
82
- "content": "[CLS]",
83
- "single_word": false,
84
- "lstrip": false,
85
- "rstrip": false,
86
- "normalized": false
87
- }
88
- },
89
- "decoder": {
90
- "type": "WordPiece",
91
- "cleanup": true
92
- },
93
- "model": {
94
- "type": "WordPiece",
95
- "unk_token": "[UNK]",
96
- "vocab": {
97
- "[PAD]": 0,
98
- "[UNK]": 1,
99
- "[CLS]": 2,
100
- "[SEP]": 3,
101
- "[MASK]": 4,
102
- "hello": 5,
103
- "world": 6
104
- },
105
- "max_input_chars_per_word": 100
106
- }
107
- }