trollek commited on
Commit
177b371
1 Parent(s): 20a59ca

Update tokenizer_config.json

Browse files

Updates to the template were made. It didn't generate properly and fast tokenizer was inverted to false.

Files changed (1) hide show
  1. tokenizer_config.json +4 -5
tokenizer_config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "add_bos_token": false,
3
  "add_eos_token": true,
4
- "add_prefix_space": true,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
@@ -111,20 +110,20 @@
111
  "<|endofthought|>"
112
  ],
113
  "bos_token": "<s>",
114
- "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\\n' + system_message + '\\n<|im_end|>\\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\\n' + content + '\\n<|im_end|>\\n' }}{% elif message['role'] == 'assistant' %}{{ '<|im_start|>assistant\\n' + content + '\\n<|im_end|>' + '</s>' + '\\n' }}{% endif %}{% endfor %}",
115
- "clean_up_tokenization_spaces": false,
116
  "cls_token": "</s>",
117
  "eos_token": "</s>",
118
  "legacy": true,
119
  "model_max_length": 1000000000000000019884624838656,
120
  "pad_token": "<unk>",
121
  "padding_side": "right",
122
- "sep_token": "</s>",
123
  "sp_model_kwargs": {},
124
  "spaces_between_special_tokens": false,
125
  "split_special_tokens": false,
126
  "tokenizer_class": "LlamaTokenizer",
127
  "unk_token": "<unk>",
128
  "use_default_system_prompt": false,
129
- "use_fast": true
130
  }
 
1
  {
2
  "add_bos_token": false,
3
  "add_eos_token": true,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
110
  "<|endofthought|>"
111
  ],
112
  "bos_token": "<s>",
113
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\\n' + system_message + '\\n<|im_end|>\\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\\n' + content + '\\n<|im_end|>\\n' + '<|im_start|>assistant\\n' }}{% elif message['role'] == 'assistant' %}{{ content + '\\n<|im_end|>\\n' }}{% endif %}{% endfor %}",
114
+ "clean_up_tokenization_spaces": true,
115
  "cls_token": "</s>",
116
  "eos_token": "</s>",
117
  "legacy": true,
118
  "model_max_length": 1000000000000000019884624838656,
119
  "pad_token": "<unk>",
120
  "padding_side": "right",
121
+ "sep_token": "\n",
122
  "sp_model_kwargs": {},
123
  "spaces_between_special_tokens": false,
124
  "split_special_tokens": false,
125
  "tokenizer_class": "LlamaTokenizer",
126
  "unk_token": "<unk>",
127
  "use_default_system_prompt": false,
128
+ "use_fast": false
129
  }