salamandra-7b-instruct / tokenizer_config.json
mapama247
add tokenizer files
fc0cdc5
raw
history blame
2.38 kB
{
"add_bos_token": true,
"add_eos_token": false,
"add_prefix_space": true,
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"4": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"5": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<|im_start|>",
"<|im_end|>"
],
"bos_token": "<s>",
"chat_template": "{%- if not date_string is defined %}{%- set date_string = \"2024-09-30\" %}{%- endif %}{{ \"<|im_start|>system\nYou are Salamandra, an AI language model developed at the Barcelona Supercomputing Centre (BSC) by the Language Technologies Unit. Your knowledge base was last updated on August 2023. Today Date: \"+ date_string +\"\nEres Salamandra, un modelo lingüístico de IA desarrollado en el Barcelona Supercomputing Centre (BSC) por la Language Technologies Unit. Tu base de conocimientos se actualizó por última vez en agosto de 2023.\nEts Salamandra, un model de llenguatge d'IA desenvolupat al Barcelona Supercomputing Centre (BSC) per la Language Technologies Unit. La teva base de coneixement es va actualitzar per última vegada l'agost de 2023.<|im_end|>\n\" }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"legacy": true,
"model_max_length": 8192,
"pad_token": "<unk>",
"padding_side": "right",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "LlamaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": false
}