datht commited on
Commit
e8ae0cd
·
verified ·
1 Parent(s): 06ca69e

Upload tokenizer

Browse files
Files changed (5) hide show
  1. added_tokens.json +5 -5
  2. merges.txt +0 -0
  3. tokenizer.json +0 -0
  4. tokenizer_config.json +5 -5
  5. vocab.json +0 -0
added_tokens.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "[CLS]": 20114,
3
- "[MASK]": 20116,
4
- "[PAD]": 20113,
5
- "[SEP]": 20115,
6
- "[UNK]": 20112
7
  }
 
1
  {
2
+ "[CLS]": 50002,
3
+ "[MASK]": 50004,
4
+ "[PAD]": 50001,
5
+ "[SEP]": 50003,
6
+ "[UNK]": 50000
7
  }
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -9,7 +9,7 @@
9
  "single_word": false,
10
  "special": true
11
  },
12
- "20112": {
13
  "content": "[UNK]",
14
  "lstrip": false,
15
  "normalized": false,
@@ -17,7 +17,7 @@
17
  "single_word": false,
18
  "special": true
19
  },
20
- "20113": {
21
  "content": "[PAD]",
22
  "lstrip": false,
23
  "normalized": false,
@@ -25,7 +25,7 @@
25
  "single_word": false,
26
  "special": true
27
  },
28
- "20114": {
29
  "content": "[CLS]",
30
  "lstrip": false,
31
  "normalized": false,
@@ -33,7 +33,7 @@
33
  "single_word": false,
34
  "special": true
35
  },
36
- "20115": {
37
  "content": "[SEP]",
38
  "lstrip": false,
39
  "normalized": false,
@@ -41,7 +41,7 @@
41
  "single_word": false,
42
  "special": true
43
  },
44
- "20116": {
45
  "content": "[MASK]",
46
  "lstrip": false,
47
  "normalized": false,
 
9
  "single_word": false,
10
  "special": true
11
  },
12
+ "50000": {
13
  "content": "[UNK]",
14
  "lstrip": false,
15
  "normalized": false,
 
17
  "single_word": false,
18
  "special": true
19
  },
20
+ "50001": {
21
  "content": "[PAD]",
22
  "lstrip": false,
23
  "normalized": false,
 
25
  "single_word": false,
26
  "special": true
27
  },
28
+ "50002": {
29
  "content": "[CLS]",
30
  "lstrip": false,
31
  "normalized": false,
 
33
  "single_word": false,
34
  "special": true
35
  },
36
+ "50003": {
37
  "content": "[SEP]",
38
  "lstrip": false,
39
  "normalized": false,
 
41
  "single_word": false,
42
  "special": true
43
  },
44
+ "50004": {
45
  "content": "[MASK]",
46
  "lstrip": false,
47
  "normalized": false,
vocab.json CHANGED
The diff for this file is too large to render. See raw diff