Abhaykoul commited on
Commit
292d752
1 Parent(s): 5e95fe4

Update tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +21 -21
tokenizer_config.json CHANGED
@@ -152,30 +152,31 @@
152
  "rstrip": false,
153
  "single_word": false,
154
  "special": true
155
- },
156
- "49152": {
157
- "content": "<|im_start|>",
158
- "lstrip": false,
159
- "normalized": false,
160
- "rstrip": false,
161
- "single_word": false,
162
- "special": true
163
- },
164
- "49153": {
165
- "content": "<|im_end|>",
166
- "lstrip": false,
167
- "normalized": false,
168
- "rstrip": false,
169
- "single_word": false,
170
- "special": true
171
  }
172
  },
173
  "additional_special_tokens": [
174
- "<|im_start|>",
175
- "<|im_end|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  ],
177
  "bos_token": "<|endoftext|>",
178
- "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
179
  "clean_up_tokenization_spaces": true,
180
  "eos_token": "<|endoftext|>",
181
  "model_max_length": 9223372036854775807,
@@ -184,5 +185,4 @@
184
  "tokenizer_class": "GPT2Tokenizer",
185
  "unk_token": "<|endoftext|>",
186
  "vocab_size": 49152
187
- }
188
-
 
152
  "rstrip": false,
153
  "single_word": false,
154
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  }
156
  },
157
  "additional_special_tokens": [
158
+ "<|endoftext|>",
159
+ "<fim_prefix>",
160
+ "<fim_middle>",
161
+ "<fim_suffix>",
162
+ "<fim_pad>",
163
+ "<filename>",
164
+ "<gh_stars>",
165
+ "<issue_start>",
166
+ "<issue_comment>",
167
+ "<issue_closed>",
168
+ "<jupyter_start>",
169
+ "<jupyter_text>",
170
+ "<jupyter_code>",
171
+ "<jupyter_output>",
172
+ "<empty_output>",
173
+ "<commit_before>",
174
+ "<commit_msg>",
175
+ "<commit_after>",
176
+ "<reponame>"
177
  ],
178
  "bos_token": "<|endoftext|>",
179
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ 'Question:\n' + message['content'] + '\n\n' }}{% elif message['role'] == 'system' %}\n{{ 'System:\n' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Answer:\n' + message['content'] + '\n\n' }}{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ 'Answer:\n' }}{% endif %}{% endfor %}",
180
  "clean_up_tokenization_spaces": true,
181
  "eos_token": "<|endoftext|>",
182
  "model_max_length": 9223372036854775807,
 
185
  "tokenizer_class": "GPT2Tokenizer",
186
  "unk_token": "<|endoftext|>",
187
  "vocab_size": 49152
188
+ }