bleugreen commited on
Commit
6e49112
1 Parent(s): 3579614

Upload tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "</s>": 2,
3
+ "<s>": 1,
4
+ "<unk>": 0,
5
+ "<|im_end|>": 32000,
6
+ "<|im_start|>": 32001
7
+ }
special_tokens_map.json CHANGED
@@ -2,10 +2,11 @@
2
  "additional_special_tokens": [
3
  "<unk>",
4
  "<s>",
5
- "</s>"
 
6
  ],
7
  "bos_token": "<s>",
8
- "eos_token": "</s>",
9
- "pad_token": "</s>",
10
  "unk_token": "<unk>"
11
  }
 
2
  "additional_special_tokens": [
3
  "<unk>",
4
  "<s>",
5
+ "</s>",
6
+ "<|im_end|>"
7
  ],
8
  "bos_token": "<s>",
9
+ "eos_token": "<|im_end|>",
10
+ "pad_token": "<|im_end|>",
11
  "unk_token": "<unk>"
12
  }
tokenizer.json CHANGED
@@ -27,8 +27,26 @@
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
 
 
 
 
 
 
 
 
 
30
  "normalized": false,
31
  "special": true
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "normalizer": {
 
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
+ "normalized": true,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 32000,
35
+ "content": "<|im_end|>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
  "normalized": false,
40
  "special": true
41
+ },
42
+ {
43
+ "id": 32001,
44
+ "content": "<|im_start|>",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": false
50
  }
51
  ],
52
  "normalizer": {
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
tokenizer_config.json CHANGED
@@ -19,27 +19,46 @@
19
  "2": {
20
  "content": "</s>",
21
  "lstrip": false,
 
 
 
 
 
 
 
 
22
  "normalized": false,
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
 
 
 
 
 
 
 
 
26
  }
27
  },
28
  "additional_special_tokens": [
29
  "<unk>",
30
  "<s>",
31
- "</s>"
 
32
  ],
33
  "bos_token": "<s>",
34
- "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
35
  "clean_up_tokenization_spaces": false,
36
- "eos_token": "</s>",
37
  "legacy": true,
38
  "model_max_length": 1000000000000000019884624838656,
39
- "pad_token": "</s>",
40
  "sp_model_kwargs": {},
41
  "spaces_between_special_tokens": false,
42
  "tokenizer_class": "LlamaTokenizer",
 
43
  "unk_token": "<unk>",
44
- "use_default_system_prompt": true
 
45
  }
 
19
  "2": {
20
  "content": "</s>",
21
  "lstrip": false,
22
+ "normalized": true,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32000": {
28
+ "content": "<|im_end|>",
29
+ "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
+ },
35
+ "32001": {
36
+ "content": "<|im_start|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": false
42
  }
43
  },
44
  "additional_special_tokens": [
45
  "<unk>",
46
  "<s>",
47
+ "</s>",
48
+ "<|im_end|>"
49
  ],
50
  "bos_token": "<s>",
51
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
52
  "clean_up_tokenization_spaces": false,
53
+ "eos_token": "<|im_end|>",
54
  "legacy": true,
55
  "model_max_length": 1000000000000000019884624838656,
56
+ "pad_token": "<|im_end|>",
57
  "sp_model_kwargs": {},
58
  "spaces_between_special_tokens": false,
59
  "tokenizer_class": "LlamaTokenizer",
60
+ "trust_remote_code": false,
61
  "unk_token": "<unk>",
62
+ "use_default_system_prompt": true,
63
+ "use_fast": true
64
  }