ng0-k1 commited on
Commit
4da9f17
1 Parent(s): e8c85b1

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
  "<BOS>": 50257,
3
- "<EOS>": 50258,
4
- "<PAD>": 50259
5
  }
 
1
  {
2
  "<BOS>": 50257,
3
+ "<EOS>": 50258
 
4
  }
special_tokens_map.json CHANGED
@@ -13,12 +13,6 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": {
17
- "content": "<PAD>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
  "unk_token": "<|endoftext|>"
24
  }
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "<EOS>",
 
 
 
 
 
 
17
  "unk_token": "<|endoftext|>"
18
  }
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 1024,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
@@ -34,15 +29,6 @@
34
  "rstrip": false,
35
  "normalized": false,
36
  "special": true
37
- },
38
- {
39
- "id": 50259,
40
- "content": "<PAD>",
41
- "single_word": false,
42
- "lstrip": false,
43
- "rstrip": false,
44
- "normalized": false,
45
- "special": true
46
  }
47
  ],
48
  "normalizer": null,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "normalizer": null,
tokenizer_config.json CHANGED
@@ -24,21 +24,13 @@
24
  "rstrip": false,
25
  "single_word": false,
26
  "special": true
27
- },
28
- "50259": {
29
- "content": "<PAD>",
30
- "lstrip": false,
31
- "normalized": false,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
  }
36
  },
37
  "bos_token": "<BOS>",
38
  "clean_up_tokenization_spaces": true,
39
  "eos_token": "<EOS>",
40
  "model_max_length": 1024,
41
- "pad_token": "<PAD>",
42
  "tokenizer_class": "GPT2Tokenizer",
43
  "unk_token": "<|endoftext|>"
44
  }
 
24
  "rstrip": false,
25
  "single_word": false,
26
  "special": true
 
 
 
 
 
 
 
 
27
  }
28
  },
29
  "bos_token": "<BOS>",
30
  "clean_up_tokenization_spaces": true,
31
  "eos_token": "<EOS>",
32
  "model_max_length": 1024,
33
+ "pad_token": "<EOS>",
34
  "tokenizer_class": "GPT2Tokenizer",
35
  "unk_token": "<|endoftext|>"
36
  }