t3ga commited on
Commit
0cbb5cd
·
1 Parent(s): 34b39a9
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "CohereForAI/c4ai-command-r-v01",
3
+ "architectures": [
4
+ "CohereForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 5,
9
+ "eos_token_id": 255001,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 8192,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 22528,
14
+ "layer_norm_eps": 1e-05,
15
+ "logit_scale": 0.0625,
16
+ "max_position_embeddings": 8192,
17
+ "model_max_length": 131072,
18
+ "model_type": "cohere",
19
+ "num_attention_heads": 64,
20
+ "num_hidden_layers": 40,
21
+ "num_key_value_heads": 64,
22
+ "pad_token_id": 0,
23
+ "pretraining_tp": 1,
24
+ "quantization_config": {
25
+ "bits": 4,
26
+ "damp_percent": 0.1,
27
+ "desc_act": true,
28
+ "group_size": 32,
29
+ "is_marlin_format": false,
30
+ "model_file_base_name": "gptq_model-4bit-32g",
31
+ "model_name_or_path": "command-r-gptq",
32
+ "quant_method": "gptq",
33
+ "static_groups": false,
34
+ "sym": true,
35
+ "true_sequential": true
36
+ },
37
+ "rope_theta": 8000000.0,
38
+ "torch_dtype": "float16",
39
+ "transformers_version": "4.39.1",
40
+ "use_cache": true,
41
+ "vocab_size": 256000
42
+ }
gptq_model-4bit-32g.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcd07528e1fce56acc4f5d3d0867c8ca975a995c81550d5fd84087ec0c61dcff
3
+ size 27411564808
quantize_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 32,
4
+ "damp_percent": 0.1,
5
+ "desc_act": true,
6
+ "static_groups": false,
7
+ "sym": true,
8
+ "true_sequential": true,
9
+ "model_name_or_path": "command-r-gptq",
10
+ "model_file_base_name": "gptq_model-4bit-32g",
11
+ "is_marlin_format": false,
12
+ "quant_method": "gptq"
13
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<BOS_TOKEN>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|END_OF_TURN_TOKEN|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<PAD>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenization_cohere_fast.py ADDED
@@ -0,0 +1,754 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2024 Cohere and The HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ # This file is based on the tokenization_llama_fast.py file in transformers
17
+
18
+
19
+ import os
20
+ from shutil import copyfile
21
+ from typing import Optional, Tuple, Dict, Union, List, Literal
22
+
23
+ from tokenizers import processors
24
+ from transformers import AutoTokenizer
25
+ from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
26
+ from transformers.utils import logging
27
+ from transformers.utils.versions import require_version
28
+ from transformers.tokenization_utils_base import TensorType
29
+ from transformers.pipelines.conversational import Conversation
30
+
31
+ from .configuration_cohere import CohereConfig
32
+
33
+ require_version("tokenizers>=0.13.3")
34
+
35
+ logger = logging.get_logger(__name__)
36
+ VOCAB_FILES_NAMES = {"vocab_file": "tokenizer.json"}
37
+
38
+ PRETRAINED_VOCAB_FILES_MAP = {
39
+ "vocab_file": {
40
+ "cohere-tokenizer": "https://huggingface.co/Cohere/Command-nightly/blob/main/tokenizer.json",
41
+ },
42
+ }
43
+
44
+ # fmt: off
45
+ DEFAULT_SYSTEM_PROMPT = "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere."
46
+ DEFAULT_RAG_PREAMBLE = """## Task and Context
47
+ You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging.
48
+
49
+ ## Style Guide
50
+ Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling."""
51
+ # fmt: on
52
+
53
+
54
+ class CohereTokenizerFast(PreTrainedTokenizerFast):
55
+ """
56
+ Construct a Cohere tokenizer. Based on byte-level Byte-Pair-Encoding.
57
+
58
+ This uses notably ByteFallback and NFC normalization.
59
+
60
+ ```python
61
+ >>> from transformers import AutoTokenizer
62
+
63
+ >>> tokenizer = AutoTokenizer.from_pretrained("CohereForAI/c4ai-command-r-0.1")
64
+ >>> tokenizer.encode("Hello this is a test")
65
+ [1, 15043, 445, 338, 263, 1243]
66
+ ```
67
+
68
+ If you want to change the `bos_token` or the `eos_token`, make sure to specify them when initializing the model, or
69
+ call `tokenizer.update_post_processor()` to make sure that the post-processing is correctly done (otherwise the
70
+ values of the first token and final token of an encoded sequence will not be correct). For more details, checkout
71
+ [post-processors] (https://huggingface.co/docs/tokenizers/api/post-processors) documentation.
72
+
73
+
74
+ This tokenizer inherits from [`PreTrainedTokenizerFast`] which contains most of the main methods. Users should
75
+ refer to this superclass for more information regarding those methods.
76
+
77
+ Args:
78
+ vocab_file (`str`, *optional*):
79
+ [SentencePiece](https://github.com/google/sentencepiece) file (generally has a .model extension) that
80
+ contains the vocabulary necessary to instantiate a tokenizer.
81
+ tokenizer_file (`str`, *optional*):
82
+ [tokenizers](https://github.com/huggingface/tokenizers) file (generally has a .json extension) that
83
+ contains everything needed to load the tokenizer.
84
+ clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
85
+ Whether or not to cleanup spaces after decoding, cleanup consists in removing potential artifacts like
86
+ extra spaces.
87
+ unk_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<unk>"`):
88
+ The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
89
+ token instead.
90
+ bos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<s>"`):
91
+ The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
92
+ eos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"</s>"`):
93
+ The end of sequence token.
94
+ add_bos_token (`bool`, *optional*, defaults to `True`):
95
+ Whether or not to add an `bos_token` at the start of sequences.
96
+ add_eos_token (`bool`, *optional*, defaults to `False`):
97
+ Whether or not to add an `eos_token` at the end of sequences.
98
+ use_default_system_prompt (`bool`, *optional*, defaults to `False`):
99
+ Whether or not the default system prompt for Cohere tokenizer should be used.
100
+ add_prefix_space (`bool`, *optional*):
101
+ Whether or not the tokenizer should automatically add a prefix space
102
+ """
103
+
104
+ vocab_files_names = VOCAB_FILES_NAMES
105
+ padding_side = "left"
106
+ model_input_names = ["input_ids", "attention_mask"]
107
+
108
+ def __init__(
109
+ self,
110
+ vocab_file=None,
111
+ tokenizer_file=None,
112
+ clean_up_tokenization_spaces=False,
113
+ unk_token="<UNK>",
114
+ bos_token="<BOS_TOKEN>",
115
+ eos_token="<EOS_TOKEN>",
116
+ add_bos_token=True,
117
+ add_eos_token=False,
118
+ use_default_system_prompt=False,
119
+ add_prefix_space=None,
120
+ **kwargs,
121
+ ):
122
+ if add_prefix_space is not None:
123
+ logger.warning_once(
124
+ "You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers"
125
+ )
126
+ kwargs["from_slow"] = True
127
+
128
+ super().__init__(
129
+ vocab_file=vocab_file,
130
+ tokenizer_file=tokenizer_file,
131
+ clean_up_tokenization_spaces=clean_up_tokenization_spaces,
132
+ unk_token=unk_token,
133
+ bos_token=bos_token,
134
+ eos_token=eos_token,
135
+ add_bos_token=add_bos_token,
136
+ add_eos_token=add_eos_token,
137
+ use_default_system_prompt=use_default_system_prompt,
138
+ **kwargs,
139
+ )
140
+ self._add_bos_token = add_bos_token
141
+ self._add_eos_token = add_eos_token
142
+ self.update_post_processor()
143
+ self.use_default_system_prompt = use_default_system_prompt
144
+ self.vocab_file = vocab_file
145
+ self.grounded_generation_template = kwargs.pop("grounded_generation_template", None)
146
+ self.tool_use_template = kwargs.pop("tool_use_template", None)
147
+
148
+ def update_post_processor(self):
149
+ """
150
+ Updates the underlying post processor with the current `bos_token` and `eos_token`.
151
+ """
152
+ bos = self.bos_token
153
+ bos_token_id = self.bos_token_id
154
+ if bos is None and self.add_bos_token:
155
+ raise ValueError("add_bos_token = True but bos_token = None")
156
+
157
+ eos = self.eos_token
158
+ eos_token_id = self.eos_token_id
159
+ if eos is None and self.add_eos_token:
160
+ raise ValueError("add_eos_token = True but eos_token = None")
161
+
162
+ single = f"{(bos+':0 ') if self.add_bos_token else ''}$A:0{(' '+eos+':0') if self.add_eos_token else ''}"
163
+ pair = f"{single}{(' '+bos+':1') if self.add_bos_token else ''} $B:1{(' '+eos+':1') if self.add_eos_token else ''}"
164
+
165
+ special_tokens = []
166
+ if self.add_bos_token:
167
+ special_tokens.append((bos, bos_token_id))
168
+ if self.add_eos_token:
169
+ special_tokens.append((eos, eos_token_id))
170
+ self._tokenizer.post_processor = processors.TemplateProcessing(
171
+ single=single, pair=pair, special_tokens=special_tokens
172
+ )
173
+
174
+ @property
175
+ def add_eos_token(self):
176
+ return self._add_eos_token
177
+
178
+ @property
179
+ def add_bos_token(self):
180
+ return self._add_bos_token
181
+
182
+ @add_eos_token.setter
183
+ def add_eos_token(self, value):
184
+ self._add_eos_token = value
185
+ self.update_post_processor()
186
+
187
+ @add_bos_token.setter
188
+ def add_bos_token(self, value):
189
+ self._add_bos_token = value
190
+ self.update_post_processor()
191
+
192
+ @property
193
+ def default_chat_template(self):
194
+ """
195
+ Cohere Tokenizer uses <|START_OF_TURN_TOKEN|> and <|END_OF_TURN_TOKEN|> to indicate each turn in a chat.
196
+ Additioanlly, to indicate the source of the message, <|USER_TOKEN|>, <|CHATBOT_TOKEN|> and <|SYSTEM_TOKEN|>
197
+ for user, assitant and system messages respectively.
198
+
199
+ The output should look something like:
200
+ <|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{ preamble }}<|END_OF_TURN_TOKEN|>
201
+ <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{ How are you? }}<|END_OF_TURN_TOKEN|>
202
+ <|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{ I am doing well! }}<|END_OF_TURN_TOKEN|>
203
+
204
+ Use add_generation_prompt to add a prompt for the model to generate a response:
205
+
206
+ >>> messages = [{"role": "user", "content": "Hello, how are you?"}]
207
+ >>> tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
208
+ <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
209
+
210
+ """
211
+ logger.warning_once(
212
+ "\nNo chat template is defined for this tokenizer - using the default template "
213
+ f"for the {self.__class__.__name__} class. If the default is not appropriate for "
214
+ "your model, please set `tokenizer.chat_template` to an appropriate template. "
215
+ "See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n"
216
+ )
217
+ template = (
218
+ "{{ bos_token }}"
219
+ "{% if messages[0]['role'] == 'system' %}"
220
+ "{% set loop_messages = messages[1:] %}" # Extract system message if it's present
221
+ "{% set system_message = messages[0]['content'] %}"
222
+ "{% elif USE_DEFAULT_PROMPT == true %}"
223
+ "{% set loop_messages = messages %}" # Or use the default system message if the flag is set
224
+ "{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}"
225
+ "{% else %}"
226
+ "{% set loop_messages = messages %}"
227
+ "{% set system_message = false %}"
228
+ "{% endif %}"
229
+ "{% if system_message != false %}" # Start with system message
230
+ "{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}"
231
+ "{% endif %}"
232
+ "{% for message in loop_messages %}" # Loop over all non-system messages
233
+ "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
234
+ "{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}"
235
+ "{% endif %}"
236
+ "{% set content = message['content'] %}"
237
+ "{% if message['role'] == 'user' %}" # After all of that, handle messages/roles in a fairly normal way
238
+ "{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}"
239
+ "{% elif message['role'] == 'assistant' %}"
240
+ "{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}"
241
+ "{% endif %}"
242
+ "{% endfor %}"
243
+ "{% if add_generation_prompt %}"
244
+ "{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}"
245
+ "{% endif %}"
246
+ )
247
+ template = template.replace("USE_DEFAULT_PROMPT", "true" if self.use_default_system_prompt else "false")
248
+ default_message = DEFAULT_SYSTEM_PROMPT.replace("\n", "\\n").replace("'", "\\'")
249
+ template = template.replace("DEFAULT_SYSTEM_MESSAGE", default_message)
250
+
251
+ return template
252
+
253
+ @property
254
+ def default_tool_use_template(self):
255
+ template = (
256
+ "{{ bos_token }}"
257
+ "{% if messages[0]['role'] == 'system' %}"
258
+ "{% set loop_messages = messages[1:] %}" # Extract system message if it's present
259
+ "{% set system_message = messages[0]['content'] %}"
260
+ "{% else %}"
261
+ "{% set loop_messages = messages %}"
262
+ "{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}"
263
+ "{% endif %}"
264
+ "{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' }}"
265
+ "{{ '# Safety Preamble' }}"
266
+ "{{ '\nThe instructions in this section override those in the task description and style guide sections. Don\\'t answer questions that are harmful or immoral.' }}"
267
+ "{{ '\n\n# System Preamble' }}"
268
+ "{{ '\n## Basic Rules' }}"
269
+ "{{ '\nYou are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user\\'s requests, you cite your sources in your answers, according to those instructions.' }}"
270
+ "{{ '\n\n# User Preamble' }}"
271
+ "{{ '\n' + system_message }}"
272
+ "{{'\n\n## Available Tools\nHere is a list of tools that you have available to you:\n\n'}}"
273
+ "{% for tool in tools %}"
274
+ "{% if loop.index0 != 0 %}"
275
+ "{{ '\n\n'}}"
276
+ "{% endif %}"
277
+ "{{'```python\ndef ' + tool.name + '('}}"
278
+ "{% for param_name, param_fields in tool.parameter_definitions.items() %}"
279
+ "{% if loop.index0 != 0 %}"
280
+ "{{ ', '}}"
281
+ "{% endif %}"
282
+ "{{param_name}}: "
283
+ "{% if not param_fields.required %}"
284
+ "{{'Optional[' + param_fields.type + '] = None'}}"
285
+ "{% else %}"
286
+ "{{ param_fields.type }}"
287
+ "{% endif %}"
288
+ "{% endfor %}"
289
+ "{{ ') -> List[Dict]:\n \"\"\"'}}"
290
+ "{{ tool.description }}"
291
+ "{% if tool.parameter_definitions|length != 0 %}"
292
+ "{{ '\n\n Args:\n '}}"
293
+ "{% for param_name, param_fields in tool.parameter_definitions.items() %}"
294
+ "{% if loop.index0 != 0 %}"
295
+ "{{ '\n ' }}"
296
+ "{% endif %}"
297
+ "{{ param_name + ' ('}}"
298
+ "{% if not param_fields.required %}"
299
+ "{{'Optional[' + param_fields.type + ']'}}"
300
+ "{% else %}"
301
+ "{{ param_fields.type }}"
302
+ "{% endif %}"
303
+ "{{ '): ' + param_fields.description }}"
304
+ "{% endfor %}"
305
+ "{% endif %}"
306
+ "{{ '\n \"\"\"\n pass\n```' }}"
307
+ "{% endfor %}"
308
+ "{{ '<|END_OF_TURN_TOKEN|>'}}"
309
+ "{% for message in loop_messages %}"
310
+ "{% set content = message['content'] %}"
311
+ "{% if message['role'] == 'user' %}"
312
+ "{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}"
313
+ "{% elif message['role'] == 'system' %}"
314
+ "{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}"
315
+ "{% elif message['role'] == 'assistant' %}"
316
+ "{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}"
317
+ "{% endif %}"
318
+ "{% endfor %}"
319
+ "{{'<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write \\'Action:\\' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user\\'s last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example:\n```json\n[\n {\n \"tool_name\": title of the tool in the specification,\n \"parameters\": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters\n }\n]```<|END_OF_TURN_TOKEN|>'}}"
320
+ "{% if add_generation_prompt %}"
321
+ "{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}"
322
+ "{% endif %}"
323
+ )
324
+ default_message = DEFAULT_RAG_PREAMBLE.replace("\n", "\\n").replace("'", "\\'")
325
+ template = template.replace("DEFAULT_SYSTEM_MESSAGE", default_message)
326
+ return template
327
+
328
+ @property
329
+ def default_grounded_generation_template(self):
330
+ template = (
331
+ "{{ bos_token }}"
332
+ "{% if messages[0]['role'] == 'system' %}"
333
+ "{% set loop_messages = messages[1:] %}" # Extract system message if it's present
334
+ "{% set system_message = messages[0]['content'] %}"
335
+ "{% else %}"
336
+ "{% set loop_messages = messages %}"
337
+ "{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}"
338
+ "{% endif %}"
339
+ "{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' }}"
340
+ "{{ '# Safety Preamble' }}"
341
+ "{{ '\nThe instructions in this section override those in the task description and style guide sections. Don\\'t answer questions that are harmful or immoral.' }}"
342
+ "{{ '\n\n# System Preamble' }}"
343
+ "{{ '\n## Basic Rules' }}"
344
+ "{{ '\nYou are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user\\'s requests, you cite your sources in your answers, according to those instructions.' }}"
345
+ "{{ '\n\n# User Preamble' }}"
346
+ "{{ '\n' + system_message }}"
347
+ "{{ '<|END_OF_TURN_TOKEN|>'}}"
348
+ "{% for message in loop_messages %}" # Loop over all non-system messages
349
+ "{% set content = message['content'] %}"
350
+ "{% if message['role'] == 'user' %}" # After all of that, handle messages/roles in a fairly normal way
351
+ "{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}"
352
+ "{% elif message['role'] == 'system' %}"
353
+ "{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}"
354
+ "{% elif message['role'] == 'assistant' %}"
355
+ "{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}"
356
+ "{% endif %}"
357
+ "{% endfor %}"
358
+ "{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>'}}"
359
+ "{{ '<results>' }}"
360
+ "{% for document in documents %}" # Loop over all non-system messages
361
+ "{{ '\nDocument: ' }}"
362
+ "{{ loop.index0 }}\n"
363
+ "{% for key, value in document.items() %}"
364
+ "{{ key }}: {{value}}\n"
365
+ "{% endfor %}"
366
+ "{% endfor %}"
367
+ "{{ '</results>'}}"
368
+ "{{ '<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' }}"
369
+ "{{ 'Carefully perform the following instructions, in order, starting each with a new line.\n' }}"
370
+ "{{ 'Firstly, Decide which of the retrieved documents are relevant to the user\\'s last input by writing \\'Relevant Documents:\\' followed by comma-separated list of document numbers. If none are relevant, you should instead write \\'None\\'.\n' }}"
371
+ "{{ 'Secondly, Decide which of the retrieved documents contain facts that should be cited in a good answer to the user\\'s last input by writing \\'Cited Documents:\\' followed a comma-separated list of document numbers. If you dont want to cite any of them, you should instead write \\'None\\'.\n' }}"
372
+ "{% if citation_mode=='accurate' %}"
373
+ "{{ 'Thirdly, Write \\'Answer:\\' followed by a response to the user\\'s last input in high quality natural english. Use the retrieved documents to help you. Do not insert any citations or grounding markup.\n' }}"
374
+ "{% endif %}"
375
+ "{{ 'Finally, Write \\'Grounded answer:\\' followed by a response to the user\\'s last input in high quality natural english. Use the symbols <co: doc> and </co: doc> to indicate when a fact comes from a document in the search result, e.g <co: 0>my fact</co: 0> for a fact from document 0.' }}"
376
+ "{{ '<|END_OF_TURN_TOKEN|>' }}"
377
+ "{% if add_generation_prompt %}"
378
+ "{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}"
379
+ "{% endif %}"
380
+ )
381
+ default_message = DEFAULT_RAG_PREAMBLE.replace("\n", "\\n").replace("'", "\\'")
382
+ template = template.replace("DEFAULT_SYSTEM_MESSAGE", default_message)
383
+ return template
384
+
385
+ def _apply_template_with_arguments(
386
+ self,
387
+ conversation: Union[List[Dict[str, str]], "Conversation"],
388
+ template: Optional[str] = None,
389
+ add_generation_prompt: bool = False,
390
+ tokenize: bool = True,
391
+ padding: bool = False,
392
+ truncation: bool = False,
393
+ max_length: Optional[int] = None,
394
+ return_tensors: Optional[Union[str, TensorType]] = None,
395
+ return_dict: bool = False,
396
+ **kwargs,
397
+ ) -> Union[str, List[int]]:
398
+ """Just tokenization_utils_base.apply_chat_template, but modified so that the jinjia template can take kwargs"""
399
+ if hasattr(conversation, "messages"):
400
+ # Indicates it's a Conversation object
401
+ conversation = conversation.messages
402
+
403
+ # Compilation function uses a cache to avoid recompiling the same template
404
+ compiled_template = self._compile_jinja_template(template)
405
+
406
+ rendered = compiled_template.render(
407
+ messages=conversation,
408
+ add_generation_prompt=add_generation_prompt,
409
+ **kwargs,
410
+ **self.special_tokens_map
411
+ )
412
+
413
+ if padding is True:
414
+ padding = "max_length" # There's only one sequence here, so "longest" makes no sense
415
+ if tokenize:
416
+ if return_dict:
417
+ return self(
418
+ rendered,
419
+ padding=padding,
420
+ truncation=truncation,
421
+ max_length=max_length,
422
+ add_special_tokens=False,
423
+ return_tensors=return_tensors,
424
+ **kwargs,
425
+ )
426
+ else:
427
+ return self.encode(
428
+ rendered,
429
+ padding=padding,
430
+ truncation=truncation,
431
+ max_length=max_length,
432
+ add_special_tokens=False,
433
+ return_tensors=return_tensors,
434
+ **kwargs,
435
+ )
436
+ else:
437
+ return rendered
438
+
439
+ def apply_tool_use_template(
440
+ self,
441
+ conversation: Union[List[Dict[str, str]], "Conversation"],
442
+ tools: List[Dict],
443
+ tool_use_template: Optional[str] = None,
444
+ **kwargs
445
+ ) -> Union[str, List[int]]:
446
+ """Create a Command-R tool-use prompt.
447
+
448
+ Once rendered, the prompt instructs the model to generate a list of actions to perform on a set of user supplied tools
449
+ to help carry out the user's requests.
450
+
451
+ Conceptually, this works in the same way as `apply_chat_format`, but takes an additional `tools` parameter.
452
+
453
+ Converts a Conversation object or a list of dictionaries with `"role"` and `"content"` keys and a list of available
454
+ tools for the model to use into a prompt string, or a list of token ids.
455
+ This method will use the tokenizer's `default_tool_use_template` template specified at the class level.
456
+ You can override the default template using the `tool_use_template` kwarg but the quality of your results may decrease.
457
+
458
+ Args:
459
+ conversation (Union[List[Dict[str, str]], "Conversation"]): A Conversation object or list of dicts
460
+ with "role" and "content" keys, representing the chat history so far.
461
+ tools (List[Dict]): a list of tools to render into the prompt for the model to choose from.
462
+ See an example at the bottom of the docstring.
463
+ The format should be:
464
+ * name (str): The name of the tool to be called. Valid names contain only the characters a-z,
465
+ A-Z, 0-9, _ and must not begin with a digit.
466
+ * description (str): The description of what the tool does, the model uses the description to
467
+ choose when and how to call the function.
468
+ * parameter_definitions (List[Dict]): The input parameters of the tool. Accepts a dictionary
469
+ where the key is the name of the parameter and the value is the parameter spec.
470
+ Valid parameter names contain only the characters a-z, A-Z, 0-9, _ and must not begin with a digit.
471
+ Parameter specs are as follows:
472
+ * description (str): The description of the parameter.
473
+ * type (str): the type of the parameter - most effective for python builtin data types, such as 'str', 'bool'
474
+ * required: boolean: Denotes whether the parameter is always present (required) or not. Defaults to not required.
475
+ tool_use_template (str, *optional*): A Jinja template to use for this conversion. If
476
+ this is not passed, the model's default chat template will be used instead.
477
+ add_generation_prompt (bool, *optional*): Whether to end the prompt with the token(s) that indicate
478
+ the start of an assistant message. This is useful when you want to generate a response from the model.
479
+ Note that this argument will be passed to the chat template, and so it must be supported in the
480
+ template for this argument to have any effect.
481
+ tokenize (`bool`, defaults to `True`):
482
+ Whether to tokenize the output. If `False`, the output will be a string.
483
+ padding (`bool`, defaults to `False`):
484
+ Whether to pad sequences to the maximum length. Has no effect if tokenize is `False`.
485
+ truncation (`bool`, defaults to `False`):
486
+ Whether to truncate sequences at the maximum length. Has no effect if tokenize is `False`.
487
+ max_length (`int`, *optional*):
488
+ Maximum length (in tokens) to use for padding or truncation. Has no effect if tokenize is `False`. If
489
+ not specified, the tokenizer's `max_length` attribute will be used as a default.
490
+ return_tensors (`str` or [`~utils.TensorType`], *optional*):
491
+ If set, will return tensors of a particular framework. Has no effect if tokenize is `False`. Acceptable
492
+ values are:
493
+ - `'tf'`: Return TensorFlow `tf.Tensor` objects.
494
+ - `'pt'`: Return PyTorch `torch.Tensor` objects.
495
+ - `'np'`: Return NumPy `np.ndarray` objects.
496
+ - `'jax'`: Return JAX `jnp.ndarray` objects.
497
+ return_dict (`bool`, *optional*, defaults to `False`):
498
+ Whether to return a dictionary with named outputs. Has no effect if tokenize is `False`.
499
+ **tokenizer_kwargs: Additional kwargs to pass to the tokenizer.
500
+
501
+ Returns:
502
+ `str`: A rendered prompt string.
503
+ or if tokenize=True:
504
+ `List[int]`: A list of token ids representing the tokenized chat so far, including control tokens. This
505
+ output is ready to pass to the model, either directly or via methods like `generate()`.
506
+
507
+ Examples:
508
+
509
+ ```python
510
+ >>> tokenizer = CohereTokenizerFast.from_pretrained("CohereForAI/c4ai-command-r-0.1")
511
+ >>> tools = [
512
+ {
513
+ "name": "internet_search",
514
+ "description": "Returns a list of relevant document snippets for a textual query retrieved from the internet",
515
+ "parameter_definitions": {
516
+ "query": {
517
+ "description": "Query to search the internet with",
518
+ "type": "str",
519
+ "required": True
520
+ }
521
+ }
522
+ },
523
+ {
524
+ "name': "directly_answer",
525
+ "description": "Calls a standard (un-augmented) AI chatbot to generate a response given the conversation history",
526
+ "parameter_definitions": {}
527
+ }
528
+ ]
529
+ >>> conversation = [
530
+ {"role": "user", "content": "Whats the biggest penguin in the world?"}
531
+ ]
532
+ >>> # render the prompt, ready for user to inspect, or for input into the model:
533
+ >>> prompt = tokenizer.apply_tool_use_template(
534
+ conversation,
535
+ tools=tools,
536
+ tokenize=False,
537
+ add_generation_prompt=True,
538
+ )
539
+ >>> print(prompt)
540
+ <BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble
541
+ The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral.
542
+
543
+ # System Preamble
544
+ ## Basic Rules
545
+ You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions.
546
+
547
+ # User Preamble
548
+ ## Task and Context
549
+ You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging.
550
+
551
+ ## Style Guide
552
+ Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.
553
+
554
+ ## Available Tools
555
+ Here is a list of tools that you have available to you:
556
+
557
+ \`\`\`python
558
+ def internet_search(query: str) -> List[Dict]:
559
+ \"\"\"Returns a list of relevant document snippets for a textual query retrieved from the internet
560
+
561
+ Args:
562
+ query (str): Query to search the internet with
563
+ \"\"\"
564
+ pass
565
+ \`\`\`
566
+
567
+ \`\`\`python
568
+ def directly_answer() -> List[Dict]:
569
+ \"\"\"Calls a standard (un-augmented) AI chatbot to generate a response given the conversation history
570
+ \"\"\"
571
+ pass
572
+ \`\`\`<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Whats the biggest penguin in the world?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write 'Action:' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user's last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example:
573
+ \`\`\`json
574
+ [
575
+ {
576
+ "tool_name": title of the tool in the specification,
577
+ "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters
578
+ }
579
+ ]\`\`\`<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
580
+ ```
581
+ >>> inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors='pt')
582
+ >>> outputs = model.generate(inputs, max_new_tokens=128)
583
+ >>> print(tokenizer.decode(outputs[0]))
584
+ Action: ```json
585
+ [
586
+ {
587
+ "tool_name": "internet_search",
588
+ "parameters": {
589
+ "query": "biggest penguin in the world"
590
+ }
591
+ }
592
+ ]
593
+ ```
594
+ """
595
+ # priority: `tool_use_template` argument > `tokenizer.tool_use_template` > `tokenizer.default_tool_use_template`
596
+ if tool_use_template is None:
597
+ if self.tool_use_template is not None:
598
+ tool_use_template = self.tool_use_template
599
+ else:
600
+ tool_use_template = self.default_tool_use_template
601
+
602
+ return self._apply_template_with_arguments(
603
+ conversation,
604
+ tools=tools,
605
+ template=tool_use_template,
606
+ **kwargs,
607
+ )
608
+
609
+ def apply_grounded_generation_template(
610
+ self,
611
+ conversation: Union[List[Dict[str, str]], "Conversation"],
612
+ documents: List[Dict],
613
+ citation_mode: Literal["fast", "accurate"] = "accurate",
614
+ grounded_generation_template: Optional[str] = None,
615
+ **kwargs
616
+ ) -> Union[str, List[int]]:
617
+ """Create a Command-R grounded generation (aka RAG) prompt.
618
+
619
+ Once rendered, the prompt instructs the model to generate a response with citations in, based on supplied documents.
620
+
621
+ Conceptually, this works in the same way as `apply_chat_format`, but takes additional `documents`
622
+ and parameter `citation_mode` parameters.
623
+
624
+ Converts a Conversation object or a list of dictionaries with `"role"` and `"content"` keys and a list of
625
+ documents for the model to ground its response on into a prompt string, or a list of token ids.
626
+ This method will use the tokenizer's `grounded_generation_template` template specified at the class level.
627
+ You can override the default template using the `grounded_generation_template` kwarg but the quality of your results may decrease.
628
+
629
+ Args:
630
+ conversation (Union[List[Dict[str, str]], "Conversation"]): A Conversation object or list of dicts
631
+ with "role" and "content" keys, representing the chat history so far.
632
+ documents (List[Dict[str, str]): A list of dicts, representing documents or tool outputs to ground your
633
+ generation on. A document is a semistructured dict, wiht a string to string mapping. Common fields are
634
+ `url`, `title`, `snippet` etc but should be descriptive of the key. They will get rendered into the prompt.
635
+ citation_mode: either "accurate" (prompt the model to generate an answer first, then rewrite it with citation
636
+ spans in) or "fast", where the prompt instructs the model to generate an answer with citations in directly.
637
+ The former has higher quality citations, the latter requires fewer tokens to be generated.
638
+ grounded_generation_template (str, *optional*): A Jinja template to use for this conversion. If
639
+ this is not passed, the model's default grounded_generation_template template will be used instead.
640
+ add_generation_prompt (bool, *optional*): Whether to end the prompt with the token(s) that indicate
641
+ the start of an assistant message. This is useful when you want to generate a response from the model.
642
+ Note that this argument will be passed to the chat template, and so it must be supported in the
643
+ template for this argument to have any effect.
644
+ tokenize (`bool`, defaults to `True`):
645
+ Whether to tokenize the output. If `False`, the output will be a string.
646
+ padding (`bool`, defaults to `False`):
647
+ Whether to pad sequences to the maximum length. Has no effect if tokenize is `False`.
648
+ truncation (`bool`, defaults to `False`):
649
+ Whether to truncate sequences at the maximum length. Has no effect if tokenize is `False`.
650
+ max_length (`int`, *optional*):
651
+ Maximum length (in tokens) to use for padding or truncation. Has no effect if tokenize is `False`. If
652
+ not specified, the tokenizer's `max_length` attribute will be used as a default.
653
+ return_tensors (`str` or [`~utils.TensorType`], *optional*):
654
+ If set, will return tensors of a particular framework. Has no effect if tokenize is `False`. Acceptable
655
+ values are:
656
+ - `'tf'`: Return TensorFlow `tf.Tensor` objects.
657
+ - `'pt'`: Return PyTorch `torch.Tensor` objects.
658
+ - `'np'`: Return NumPy `np.ndarray` objects.
659
+ - `'jax'`: Return JAX `jnp.ndarray` objects.
660
+ return_dict (`bool`, *optional*, defaults to `False`):
661
+ Whether to return a dictionary with named outputs. Has no effect if tokenize is `False`.
662
+ **tokenizer_kwargs: Additional kwargs to pass to the tokenizer.
663
+
664
+ Returns:
665
+ `str`: A rendered prompt string.
666
+ or if tokenize=True:
667
+ `List[int]`: A list of token ids representing the tokenized chat so far, including control tokens. This
668
+ output is ready to pass to the model, either directly or via methods like `generate()`.
669
+
670
+ Examples:
671
+
672
+ ```python
673
+ >>> tokenizer = CohereTokenizerFast.from_pretrained('CohereForAI/c4ai-command-r-0.1')
674
+
675
+ >>> # define documents:
676
+ >>> documents = [
677
+ { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
678
+ { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica."}
679
+ ]
680
+ >>> # define a conversation:
681
+ >>> conversation = [
682
+ {"role": "user", "content": "Whats the biggest penguin in the world?"}
683
+ ]
684
+ >>> # render the prompt, ready for user to inspect, or for input into the model:
685
+ >>> grounded_generation_prompt = tokenizer.apply_grounded_generation_template(
686
+ conversation,
687
+ documents=documents,
688
+ tokenize=False,
689
+ add_generation_prompt=True,
690
+ )
691
+ >>> print(grounded_generation_prompt)
692
+ <BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble
693
+ The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral.
694
+
695
+ ## Basic Rules
696
+ You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions.
697
+
698
+ # User Preamble
699
+ ## Task and Context
700
+ You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging.
701
+
702
+ ## Style Guide
703
+ Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Whats the biggest penguin in the world?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><results>
704
+ Document: 0
705
+ title: Tall penguins
706
+ text: Emperor penguins are the tallest.
707
+
708
+ Document: 1
709
+ title: Penguin habitats
710
+ text: Emperor penguins only live in Antarctica.
711
+ </results><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Carefully perform the following instructions, in order, starting each with a new line.
712
+ Firstly, Decide which of the retrieved documents are relevant to the user's last input by writing 'Relevant Documents:' followed by comma-separated list of document numbers. If none are relevant, you should instead write 'None'.
713
+ Secondly, Decide which of the retrieved documents contain facts that should be cited in a good answer to the user's last input by writing 'Cited Documents:' followed a comma-separated list of document numbers. If you dont want to cite any of them, you should instead write 'None'.
714
+ Thirdly, Write 'Answer:' followed by a response to the user's last input in high quality natural english. Use the retrieved documents to help you. Do not insert any citations or grounding markup.
715
+ Finally, Write 'Grounded answer:' followed by a response to the user's last input in high quality natural english. Use the symbols <co: doc> and </co: doc> to indicate when a fact comes from a document in the search result, e.g <co: 0>my fact</co: 0> for a fact from document 0.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'''
716
+ ```
717
+ >>> inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors='pt')
718
+ >>> outputs = model.generate(inputs, max_new_tokens=128)
719
+ >>> print(tokenizer.decode(outputs[0]))
720
+ Relevant Documents: 0,1
721
+ Cited Documents: 0,1
722
+ Answer: The Emperor Penguin is the tallest or biggest penguin in the world. It is a bird that lives only in Antarctica and grows to a height of around 122 centimetres.
723
+ Grounded answer: The <co: 0>Emperor Penguin</co: 0> is the <co: 0>tallest</co: 0> or biggest penguin in the world. It is a bird that <co: 1>lives only in Antarctica</co: 1> and <co: 0>grows to a height of around 122 centimetres.</co: 0>
724
+ """
725
+ # priority: `grounded_generation_template` argument > `tokenizer.grounded_generation_template` > `tokenizer.default_grounded_generation_template`
726
+ if grounded_generation_template is None:
727
+ if self.grounded_generation_template is not None:
728
+ grounded_generation_template = self.grounded_generation_template
729
+ else:
730
+ grounded_generation_template = self.default_grounded_generation_template
731
+
732
+ return self._apply_template_with_arguments(
733
+ conversation,
734
+ documents=documents,
735
+ template=grounded_generation_template,
736
+ citation_mode=citation_mode,
737
+ **kwargs,
738
+ )
739
+
740
+ # TODO ArthurZ let's rely on the template processor instead, refactor all fast tokenizers
741
+ def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
742
+ bos_token_id = [self.bos_token_id] if self.add_bos_token else []
743
+ eos_token_id = [self.eos_token_id] if self.add_eos_token else []
744
+
745
+ output = bos_token_id + token_ids_0 + eos_token_id
746
+
747
+ if token_ids_1 is not None:
748
+ output = output + bos_token_id + token_ids_1 + eos_token_id
749
+
750
+ return output
751
+
752
+
753
+ # register the tokenizer to AutoTokenizer
754
+ AutoTokenizer.register(CohereConfig, fast_tokenizer_class=CohereTokenizerFast)
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0af6e6fe50ce1bb5611b103482de6bac000c82e06898138d57f35af121aec772
3
+ size 12777406
tokenizer_config.json ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<PAD>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<UNK>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "<CLS>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "3": {
30
+ "content": "<SEP>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "4": {
38
+ "content": "<MASK_TOKEN>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "5": {
46
+ "content": "<BOS_TOKEN>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "6": {
54
+ "content": "<EOS_TOKEN>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "7": {
62
+ "content": "<EOP_TOKEN>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "255000": {
70
+ "content": "<|START_OF_TURN_TOKEN|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": false
76
+ },
77
+ "255001": {
78
+ "content": "<|END_OF_TURN_TOKEN|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": false
84
+ },
85
+ "255002": {
86
+ "content": "<|YES_TOKEN|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": false
92
+ },
93
+ "255003": {
94
+ "content": "<|NO_TOKEN|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": false
100
+ },
101
+ "255004": {
102
+ "content": "<|GOOD_TOKEN|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": false
108
+ },
109
+ "255005": {
110
+ "content": "<|BAD_TOKEN|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": false
116
+ },
117
+ "255006": {
118
+ "content": "<|USER_TOKEN|>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "255007": {
126
+ "content": "<|CHATBOT_TOKEN|>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "255008": {
134
+ "content": "<|SYSTEM_TOKEN|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "255009": {
142
+ "content": "<|USER_0_TOKEN|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "255010": {
150
+ "content": "<|USER_1_TOKEN|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "255011": {
158
+ "content": "<|USER_2_TOKEN|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "255012": {
166
+ "content": "<|USER_3_TOKEN|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "255013": {
174
+ "content": "<|USER_4_TOKEN|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "255014": {
182
+ "content": "<|USER_5_TOKEN|>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "255015": {
190
+ "content": "<|USER_6_TOKEN|>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "255016": {
198
+ "content": "<|USER_7_TOKEN|>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "255017": {
206
+ "content": "<|USER_8_TOKEN|>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ },
213
+ "255018": {
214
+ "content": "<|USER_9_TOKEN|>",
215
+ "lstrip": false,
216
+ "normalized": false,
217
+ "rstrip": false,
218
+ "single_word": false,
219
+ "special": false
220
+ },
221
+ "255019": {
222
+ "content": "<|EXTRA_0_TOKEN|>",
223
+ "lstrip": false,
224
+ "normalized": false,
225
+ "rstrip": false,
226
+ "single_word": false,
227
+ "special": false
228
+ },
229
+ "255020": {
230
+ "content": "<|EXTRA_1_TOKEN|>",
231
+ "lstrip": false,
232
+ "normalized": false,
233
+ "rstrip": false,
234
+ "single_word": false,
235
+ "special": false
236
+ },
237
+ "255021": {
238
+ "content": "<|EXTRA_2_TOKEN|>",
239
+ "lstrip": false,
240
+ "normalized": false,
241
+ "rstrip": false,
242
+ "single_word": false,
243
+ "special": false
244
+ },
245
+ "255022": {
246
+ "content": "<|EXTRA_3_TOKEN|>",
247
+ "lstrip": false,
248
+ "normalized": false,
249
+ "rstrip": false,
250
+ "single_word": false,
251
+ "special": false
252
+ },
253
+ "255023": {
254
+ "content": "<|EXTRA_4_TOKEN|>",
255
+ "lstrip": false,
256
+ "normalized": false,
257
+ "rstrip": false,
258
+ "single_word": false,
259
+ "special": false
260
+ },
261
+ "255024": {
262
+ "content": "<|EXTRA_5_TOKEN|>",
263
+ "lstrip": false,
264
+ "normalized": false,
265
+ "rstrip": false,
266
+ "single_word": false,
267
+ "special": false
268
+ },
269
+ "255025": {
270
+ "content": "<|EXTRA_6_TOKEN|>",
271
+ "lstrip": false,
272
+ "normalized": false,
273
+ "rstrip": false,
274
+ "single_word": false,
275
+ "special": false
276
+ },
277
+ "255026": {
278
+ "content": "<|EXTRA_7_TOKEN|>",
279
+ "lstrip": false,
280
+ "normalized": false,
281
+ "rstrip": false,
282
+ "single_word": false,
283
+ "special": false
284
+ },
285
+ "255027": {
286
+ "content": "<|EXTRA_8_TOKEN|>",
287
+ "lstrip": false,
288
+ "normalized": false,
289
+ "rstrip": false,
290
+ "single_word": false,
291
+ "special": false
292
+ },
293
+ "255028": {
294
+ "content": "<|EXTRA_9_TOKEN|>",
295
+ "lstrip": false,
296
+ "normalized": false,
297
+ "rstrip": false,
298
+ "single_word": false,
299
+ "special": false
300
+ }
301
+ },
302
+ "auto_map": {
303
+ "AutoTokenizer": [
304
+ null,
305
+ "tokenization_cohere_fast.CohereTokenizerFast"
306
+ ]
307
+ },
308
+ "bos_token": "<BOS_TOKEN>",
309
+ "clean_up_tokenization_spaces": false,
310
+ "eos_token": "<|END_OF_TURN_TOKEN|>",
311
+ "legacy": true,
312
+ "model_max_length": 1000000000000000019884624838656,
313
+ "pad_token": "<PAD>",
314
+ "sp_model_kwargs": {},
315
+ "spaces_between_special_tokens": false,
316
+ "tokenizer_class": "CohereTokenizer",
317
+ "unk_token": null,
318
+ "use_default_system_prompt": false
319
+ }