webpolis commited on
Commit
59e7889
·
verified ·
1 Parent(s): 0d113d9
Files changed (1) hide show
  1. tokenizer_config.json +13 -41
tokenizer_config.json CHANGED
@@ -1,32 +1,15 @@
1
  {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
  "added_tokens_decoder": {
5
- "0": {
6
- "content": "<unk>",
7
  "lstrip": false,
8
  "normalized": false,
9
  "rstrip": false,
10
  "single_word": false,
11
  "special": true
12
  },
13
- "1": {
14
- "content": "<s>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false,
19
- "special": true
20
- },
21
- "2": {
22
- "content": "</s>",
23
- "lstrip": false,
24
- "normalized": false,
25
- "rstrip": false,
26
- "single_word": false,
27
- "special": true
28
- },
29
- "32000": {
30
  "content": "<|vulgarg|>",
31
  "lstrip": false,
32
  "normalized": true,
@@ -34,7 +17,7 @@
34
  "single_word": false,
35
  "special": false
36
  },
37
- "32001": {
38
  "content": "[INST]",
39
  "lstrip": false,
40
  "normalized": true,
@@ -42,7 +25,7 @@
42
  "single_word": false,
43
  "special": false
44
  },
45
- "32002": {
46
  "content": "[/INST]",
47
  "lstrip": false,
48
  "normalized": true,
@@ -52,23 +35,12 @@
52
  }
53
  },
54
  "additional_special_tokens": [
55
- "<unk>",
56
- "<s>",
57
- "</s>"
58
  ],
59
- "bos_token": "<s>",
60
- "clean_up_tokenization_spaces": false,
61
- "eos_token": "</s>",
62
- "legacy": true,
63
- "max_length": 512,
64
- "model_max_length": 1000000000000000019884624838656,
65
- "pad_token": "</s>",
66
- "sp_model_kwargs": {},
67
- "spaces_between_special_tokens": false,
68
- "stride": 0,
69
- "tokenizer_class": "LlamaTokenizer",
70
- "truncation_side": "right",
71
- "truncation_strategy": "longest_first",
72
- "unk_token": "<unk>",
73
- "use_default_system_prompt": true
74
  }
 
1
  {
2
+ "add_prefix_space": false,
 
3
  "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
  "lstrip": false,
7
  "normalized": false,
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
11
  },
12
+ "50257": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  "content": "<|vulgarg|>",
14
  "lstrip": false,
15
  "normalized": true,
 
17
  "single_word": false,
18
  "special": false
19
  },
20
+ "50258": {
21
  "content": "[INST]",
22
  "lstrip": false,
23
  "normalized": true,
 
25
  "single_word": false,
26
  "special": false
27
  },
28
+ "50259": {
29
  "content": "[/INST]",
30
  "lstrip": false,
31
  "normalized": true,
 
35
  }
36
  },
37
  "additional_special_tokens": [
38
+ "<|endoftext|>"
 
 
39
  ],
40
+ "bos_token": "<|endoftext|>",
41
+ "clean_up_tokenization_spaces": true,
42
+ "eos_token": "<|endoftext|>",
43
+ "model_max_length": 2048,
44
+ "tokenizer_class": "GPT2Tokenizer",
45
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
46
  }