|
from transformers import AutoTokenizer |
|
tokenizer = AutoTokenizer.from_pretrained("MarsupialAI/Monstral-123B-v2") |
|
|
|
chat = [ |
|
{"role": "system", "content": "3525265246346?"}, |
|
{"role": "user", "content": "Hello, how are you?I'm doing great. How can I help you today?I'm doing great. How can I help you today?I'm doing great. How can I help you today?I'm doing great. How can I help you today?I'm doing great. How can I help you today?"}, |
|
{"role": "assistant", "content": "I'm doing great. How can I help you today?"}, |
|
{"role": "user", "content": "I'd like to show off how chat templating works!"}, |
|
] |
|
print(tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)) |
|
def apply_chat_template_with_length_limit(tokenizer, conversations, max_length, chat_template=None): |
|
""" |
|
Apply a chat template with a length limit. |
|
|
|
Parameters: |
|
- tokenizer: The tokenizer object that provides the apply_chat_template method. |
|
- conversations: List of messages to include in the chat. |
|
- max_length: Maximum token length allowed. |
|
- chat_template: Optional custom chat template. |
|
|
|
Returns: |
|
- A string containing the chat template filled with valid messages. |
|
""" |
|
|
|
if not conversations: |
|
return "" |
|
|
|
|
|
first_msg = conversations[0] |
|
remaining_msgs = conversations[1:] |
|
|
|
valid_conv = [] |
|
|
|
|
|
template_tokens = len(tokenizer.apply_chat_template([first_msg], chat_template=chat_template)) |
|
if template_tokens <= max_length: |
|
valid_conv.append(first_msg) |
|
remaining_length = max_length - template_tokens |
|
else: |
|
|
|
remaining_length = max_length |
|
|
|
|
|
for message in remaining_msgs: |
|
|
|
temp_conv = valid_conv + [message] |
|
tokens = tokenizer.apply_chat_template(temp_conv, chat_template=chat_template) |
|
|
|
|
|
if len(tokens) <= max_length: |
|
valid_conv = temp_conv |
|
remaining_length -= len(tokens) - ( |
|
template_tokens if len(valid_conv) == 1 else 0 |
|
) |
|
else: |
|
break |
|
|
|
return tokenizer.apply_chat_template(valid_conv, tokenize=False, add_generation_prompt=True, chat_template=chat_template) |
|
|
|
|
|
|
|
|