# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. __all__ = [ "get_chat_template", "test_chat_templates", "test_hf_gguf_equivalence", "remove_special_tokens", "to_sharegpt", "standardize_sharegpt", "apply_chat_template", "train_on_responses_only", "test_construct_chat_template", ] from transformers import StoppingCriteria, StoppingCriteriaList from torch import LongTensor, FloatTensor from transformers.models.llama.modeling_llama import logger from .save import patch_saving_functions import os import shutil from .tokenizer_utils import * from .models._utils import patch_tokenizer import re from unsloth_zoo.dataset_utils import ( train_on_responses_only, ) CHAT_TEMPLATES = {} DEFAULT_SYSTEM_MESSAGE = {} # =========================================== Unsloth # Unsloth efficient template leverages from Zephyr unsloth_template = \ "{{ bos_token }}"\ "{% if messages[0]['role'] == 'system' %}"\ "{{ messages[0]['content'] + '\n' }}"\ "{% set loop_messages = messages[1:] %}"\ "{% else %}"\ "{{ '{system_message}' + '\n' }}"\ "{% set loop_messages = messages %}"\ "{% endif %}"\ "{% for message in loop_messages %}"\ "{% if message['role'] == 'user' %}"\ "{{ '>>> User: ' + message['content'] + '\n' }}"\ "{% elif message['role'] == 'assistant' %}"\ "{{ '>>> Assistant: ' + message['content'] + eos_token + '\n' }}"\ "{% else %}"\ "{{ raise_exception('Only user and assistant roles are supported!') }}"\ "{% endif %}"\ "{% endfor %}"\ "{% if add_generation_prompt %}"\ "{{ '>>> Assistant: ' }}"\ "{% endif %}" pass unsloth_ollama = \ ''' FROM {__FILE_LOCATION__} TEMPLATE """{{ if .System }}{{ .System }} {{ end }}{{ if .Prompt }}>>> User: {{ .Prompt }} {{ end }}>>> Assistant: {{ .Response }}{__EOS_TOKEN__} """ PARAMETER stop "{__EOS_TOKEN__}" PARAMETER temperature 1.5 PARAMETER min_p 0.1 SYSTEM """You are a helpful assistant to the user""" ''' unsloth_eos_token = "eos_token" CHAT_TEMPLATES["unsloth"] = (unsloth_template, unsloth_eos_token, False, unsloth_ollama,) DEFAULT_SYSTEM_MESSAGE["unsloth"] = "You are a helpful assistant to the user" pass # =========================================== Zephyr # Zephyr has no BOS! zephyr_template = \ "{% for message in messages %}"\ "{% if message['role'] == 'user' %}"\ "{{ '<|user|>\n' + message['content'] + eos_token + '\n' }}"\ "{% elif message['role'] == 'assistant' %}"\ "{{ '<|assistant|>\n' + message['content'] + eos_token + '\n' }}"\ "{% else %}"\ "{{ '<|system|>\n' + message['content'] + eos_token + '\n' }}"\ "{% endif %}"\ "{% endfor %}"\ "{% if add_generation_prompt %}"\ "{{ '<|assistant|>\n' }}"\ "{% endif %}" pass zephyr_ollama = \ ''' FROM {__FILE_LOCATION__} TEMPLATE """{{ if .System }}<|system|> {{ .System }}{__EOS_TOKEN__} {{ end }}{{ if .Prompt }}<|user|> {{ .Prompt }}{__EOS_TOKEN__} {{ end }}<|assistant|> {{ .Response }}{__EOS_TOKEN__} """ PARAMETER stop "{__EOS_TOKEN__}" PARAMETER temperature 1.5 PARAMETER min_p 0.1 ''' zephyr_eos_token = "eos_token" CHAT_TEMPLATES["zephyr"] = (zephyr_template, zephyr_eos_token, False, zephyr_ollama,) DEFAULT_SYSTEM_MESSAGE["zephyr"] = None # No system message in Zephyr pass # =========================================== ChatML # ChatML has no BOS and not EOS! Rather <|im_start|> and <|im_end|> acts as BOS / EOS. chatml_template = \ "{% for message in messages %}"\ "{% if message['role'] == 'user' %}"\ "{{'<|im_start|>user\n' + message['content'] + '<|im_end|>\n'}}"\ "{% elif message['role'] == 'assistant' %}"\ "{{'<|im_start|>assistant\n' + message['content'] + '<|im_end|>\n' }}"\ "{% else %}"\ "{{ '<|im_start|>system\n' + message['content'] + '<|im_end|>\n' }}"\ "{% endif %}"\ "{% endfor %}"\ "{% if add_generation_prompt %}"\ "{{ '<|im_start|>assistant\n' }}"\ "{% endif %}" pass chatml_ollama = \ ''' FROM {__FILE_LOCATION__} TEMPLATE """{{ if .System }}<|im_start|>system {{ .System }}<|im_end|> {{ end }}{{ if .Prompt }}<|im_start|>user {{ .Prompt }}<|im_end|> {{ end }}<|im_start|>assistant {{ .Response }}<|im_end|> """ PARAMETER stop "<|im_start|>" PARAMETER stop "<|im_end|>" PARAMETER temperature 1.5 PARAMETER min_p 0.1 ''' chatml_eos_token = "<|im_end|>" CHAT_TEMPLATES["chatml"] = (chatml_template, chatml_eos_token, True, chatml_ollama,) DEFAULT_SYSTEM_MESSAGE["chatml"] = None # No system message in ChatML pass # =========================================== Mistral-1 # Mistral Instruct doesn't allow system prompts, so we append it to the user message. mistral_template = \ "{{ bos_token }}"\ "{% if messages[0]['role'] == 'system' %}"\ "{% if messages[1]['role'] == 'user' %}"\ "{{ '[INST] ' + messages[0]['content'] + ' ' + messages[1]['content'] + ' [/INST]' }}"\ "{% set loop_messages = messages[2:] %}"\ "{% else %}"\ "{{ '[INST] ' + messages[0]['content'] + ' [/INST]' }}"\ "{% set loop_messages = messages[1:] %}"\ "{% endif %}"\ "{% else %}"\ "{% set loop_messages = messages %}"\ "{% endif %}"\ "{% for message in loop_messages %}"\ "{% if message['role'] == 'user' %}"\ "{{ '[INST] ' + message['content'] + ' [/INST]' }}"\ "{% elif message['role'] == 'assistant' %}"\ "{{ message['content'] + eos_token }}"\ "{% else %}"\ "{{ raise_exception('Only user and assistant roles are supported!') }}"\ "{% endif %}"\ "{% endfor %}" pass # Ollama from https://www.ollama.com/library/mistral mistral_ollama = \ ''' FROM {__FILE_LOCATION__} TEMPLATE """[INST] {{ if .System }}{{ .System }} {{ end }}{{ .Prompt }} [/INST]""" PARAMETER stop "{__EOS_TOKEN__}" PARAMETER temperature 1.5 PARAMETER min_p 0.1 ''' mistral_eos_token = "eos_token" CHAT_TEMPLATES["mistral"] = (mistral_template, mistral_eos_token, False, mistral_ollama,) DEFAULT_SYSTEM_MESSAGE["mistral"] = None # No system message in Mistral pass # =========================================== Llama-2 # Adds BOS to every convo! And weird <> system messages. llama_template = \ "{% if messages[0]['role'] == 'system' %}"\ "{% if messages[1]['role'] == 'user' %}"\ "{{ bos_token + '[INST] <>\n' + messages[0]['content'] + '\n<>\n\n' + messages[1]['content'] + ' [/INST]' }}"\ "{% set loop_messages = messages[2:] %}"\ "{% else %}"\ "{{ bos_token + '[INST] ' + messages[0]['content'] + ' [/INST]' }}"\ "{% set loop_messages = messages[1:] %}"\ "{% endif %}"\ "{% else %}"\ "{% set loop_messages = messages %}"\ "{% endif %}"\ "{% for message in loop_messages %}"\ "{% if message['role'] == 'user' %}"\ "{{ bos_token + '[INST] ' + message['content'].strip() + ' [/INST]' }}"\ "{% elif message['role'] == 'assistant' %}"\ "{{ ' ' + message['content'].strip() + ' ' + eos_token }}"\ "{% else %}"\ "{{ raise_exception('Only user and assistant roles are supported!') }}"\ "{% endif %}"\ "{% endfor %}" pass # Ollama from https://www.ollama.com/library/llama3 llama_ollama = \ ''' FROM {__FILE_LOCATION__} TEMPLATE """[INST] <>{{ .System }}<> {{ .Prompt }} [/INST]""" PARAMETER stop "{__EOS_TOKEN__}" PARAMETER temperature 1.5 PARAMETER min_p 0.1 ''' llama_eos_token = "eos_token" CHAT_TEMPLATES["llama"] = (llama_template, llama_eos_token, False, llama_ollama,) DEFAULT_SYSTEM_MESSAGE["llama"] = None # No system message in Llama pass # =========================================== Vicuna # https://github.com/lm-sys/FastChat/blob/main/docs/vicuna_weights_version.md#prompt-template vicuna_template = \ "{{ bos_token }}"\ "{% if messages[0]['role'] == 'system' %}"\ "{{ messages[0]['content'] + ' ' }}"\ "{% set loop_messages = messages[1:] %}"\ "{% else %}"\ "{{ '{system_message}' + ' ' }}"\ "{% set loop_messages = messages %}"\ "{% endif %}"\ "{% for message in loop_messages %}"\ "{% if message['role'] == 'user' %}"\ "{{ 'USER: ' + message['content'] + ' ' }}"\ "{% elif message['role'] == 'assistant' %}"\ "{{ 'ASSISTANT: ' + message['content'] + eos_token }}"\ "{% else %}"\ "{{ raise_exception('Only user and assistant roles are supported!') }}"\ "{% endif %}"\ "{% endfor %}"\ "{% if add_generation_prompt %}"\ "{{ 'ASSISTANT:' }}"\ "{% endif %}" pass # Ollama from https://www.ollama.com/library/vicuna vicuna_ollama = \ ''' FROM {__FILE_LOCATION__} TEMPLATE """{{ if .System }}{{ .System }} {{ end }}{{ if .Prompt }}USER: {{ .Prompt }} {{ end }}ASSISTANT: {{ .Response }} {__EOS_TOKEN__}""" PARAMETER stop "{__EOS_TOKEN__}" PARAMETER temperature 1.5 PARAMETER min_p 0.1 ''' vicuna_eos_token = "eos_token" CHAT_TEMPLATES["vicuna"] = (vicuna_template, vicuna_eos_token, False, vicuna_ollama,) DEFAULT_SYSTEM_MESSAGE["vicuna"] = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions." pass # =========================================== Vicuna Old # https://github.com/lm-sys/FastChat/blob/main/docs/vicuna_weights_version.md#prompt-template vicuna_old_template = \ "{{ bos_token }}"\ "{% if messages[0]['role'] == 'system' %}"\ "{{ messages[0]['content'] + '\n' }}"\ "{% set loop_messages = messages[1:] %}"\ "{% else %}"\ "{{ '{system_message}' + '\n' }}"\ "{% set loop_messages = messages %}"\ "{% endif %}"\ "{% for message in loop_messages %}"\ "{% if message['role'] == 'user' %}"\ "{{ '### Human: ' + message['content'] + '\n' }}"\ "{% elif message['role'] == 'assistant' %}"\ "{{ '### Assistant: ' + message['content'] + eos_token + '\n' }}"\ "{% else %}"\ "{{ raise_exception('Only user and assistant roles are supported!') }}"\ "{% endif %}"\ "{% endfor %}"\ "{% if add_generation_prompt %}"\ "{{ '### Assistant:' }}"\ "{% endif %}" pass vicuna_old_ollama = \ ''' FROM {__FILE_LOCATION__} TEMPLATE """{{ if .System }}{{ .System }} {{ end }}{{ if .Prompt }}### Human: {{ .Prompt }} {{ end }}### Assistant: {{ .Response }}{__EOS_TOKEN__} """ PARAMETER stop "{__EOS_TOKEN__}" PARAMETER temperature 1.5 PARAMETER min_p 0.1 SYSTEM """A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.""" ''' vicuna_old_eos_token = "eos_token" CHAT_TEMPLATES["vicuna_old"] = (vicuna_old_template, vicuna_old_eos_token, False, vicuna_old_ollama,) DEFAULT_SYSTEM_MESSAGE["vicuna_old"] = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human\\'s questions." CHAT_TEMPLATES["vicuna old"] = CHAT_TEMPLATES["vicuna_old"] DEFAULT_SYSTEM_MESSAGE["vicuna old"] = DEFAULT_SYSTEM_MESSAGE["vicuna_old"] pass # =========================================== Alpaca multi turn # https://github.com/tatsu-lab/stanford_alpaca Changed for multi-turn convos alpaca_template = \ "{{ bos_token }}"\ "{% if messages[0]['role'] == 'system' %}"\ "{{ messages[0]['content'] + '\n\n' }}"\ "{% set loop_messages = messages[1:] %}"\ "{% else %}"\ "{{ '{system_message}' + '\n\n' }}"\ "{% set loop_messages = messages %}"\ "{% endif %}"\ "{% for message in loop_messages %}"\ "{% if message['role'] == 'user' %}"\ "{{ '### Instruction:\n' + message['content'] + '\n\n' }}"\ "{% elif message['role'] == 'assistant' %}"\ "{{ '### Response:\n' + message['content'] + eos_token + '\n\n' }}"\ "{% else %}"\ "{{ raise_exception('Only user and assistant roles are supported!') }}"\ "{% endif %}"\ "{% endfor %}"\ "{% if add_generation_prompt %}"\ "{{ '### Response:\n' }}"\ "{% endif %}" pass alpaca_ollama = \ ''' FROM {__FILE_LOCATION__} TEMPLATE """{{ if .System }}{{ .System }} {{ end }}{{ if .Prompt }}### Instruction: {{ .Prompt }}{{ end }} ### Response: {{ .Response }}{__EOS_TOKEN__} """ PARAMETER stop "{__EOS_TOKEN__}" PARAMETER temperature 1.5 PARAMETER min_p 0.1 SYSTEM """Below are some instructions that describe some tasks. Write responses that appropriately complete each request.""" ''' alpaca_eos_token = "eos_token" CHAT_TEMPLATES["alpaca"] = (alpaca_template, alpaca_eos_token, False, alpaca_ollama,) DEFAULT_SYSTEM_MESSAGE["alpaca"] = "Below are some instructions that describe some tasks. Write responses that appropriately complete each request." pass # =========================================== Gemma # https://huggingface.co/google/gemma-7b-it # Notice we must use |trim for lstrip and rstrip. maps to 106. # maps to 107. user and model are normal 1 word tokens. gemma_template = \ "{{ bos_token }}"\ "{% if messages[0]['role'] == 'system' %}"\ "{{'user\n' + messages[0]['content'] | trim + ' ' + messages[1]['content'] | trim + '\n'}}"\ "{% set messages = messages[2:] %}"\ "{% endif %}"\ "{% for message in messages %}"\ "{% if message['role'] == 'user' %}"\ "{{'user\n' + message['content'] | trim + '\n'}}"\ "{% elif message['role'] == 'assistant' %}"\ "{{'model\n' + message['content'] | trim + '\n' }}"\ "{% else %}"\ "{{ raise_exception('Only user and assistant roles are supported!') }}"\ "{% endif %}"\ "{% endfor %}"\ "{% if add_generation_prompt %}"\ "{{ 'model\n' }}"\ "{% endif %}" pass # Ollama from https://www.ollama.com/library/gemma gemma_ollama = \ ''' FROM {__FILE_LOCATION__} TEMPLATE """user {{ if .System }}{{ .System }} {{ end }}{{ .Prompt }} model {{ .Response }} """ PARAMETER repeat_penalty 1 PARAMETER stop "" PARAMETER stop "" PARAMETER penalize_newline false PARAMETER temperature 1.5 PARAMETER min_p 0.1 ''' gemma_eos_token = "" CHAT_TEMPLATES["gemma"] = (gemma_template, gemma_eos_token, True, gemma_ollama,) DEFAULT_SYSTEM_MESSAGE["gemma"] = None # No system message in Gemma pass # =========================================== Gemma with ChatML instead # We find using is still more appropriate! gemma_chatml_template = "{{ bos_token }}" + chatml_template pass gemma_chatml_ollama = \ ''' FROM {__FILE_LOCATION__} TEMPLATE """{{ if .System }}<|im_start|>system {{ .System }}<|im_end|> {{ end }}{{ if .Prompt }}<|im_start|>user {{ .Prompt }}<|im_end|> {{ end }}<|im_start|>assistant {{ .Response }}<|im_end|> """ PARAMETER repeat_penalty 1 PARAMETER stop "<|im_start|>" PARAMETER stop "<|im_end|>" PARAMETER penalize_newline false PARAMETER temperature 1.5 PARAMETER min_p 0.1 ''' gemma_chatml_eos_token = ( {"" : "<|im_start|>", "" : "<|im_end|>"}, "<|im_end|>", ) CHAT_TEMPLATES["gemma_chatml"] = (gemma_chatml_template, gemma_chatml_eos_token, True, gemma_chatml_ollama,) DEFAULT_SYSTEM_MESSAGE["gemma_chatml"] = None # No system message in Gemma pass # =========================================== Gemma 2 # Same as Gemma 1, but with sliding window attention! # https://ollama.com/library/gemma2/blobs/6522ca797f47 gemma2_template = gemma_template gemma2_ollama = gemma_ollama + "PARAMETER num_ctx 4096\n" gemma2_eos_token = "" CHAT_TEMPLATES["gemma2"] = (gemma2_template, gemma2_eos_token, True, gemma2_ollama,) DEFAULT_SYSTEM_MESSAGE["gemma2"] = None # No system message in Gemma 2 # =========================================== Gemma 2 with ChatML instead gemma2_chatml_template = gemma_chatml_template gemma2_chatml_ollama = gemma_chatml_ollama + "PARAMETER num_ctx 4096\n" gemma2_chatml_eos_token = gemma_chatml_eos_token CHAT_TEMPLATES["gemma2_chatml"] = (gemma2_chatml_template, gemma2_chatml_eos_token, True, gemma2_chatml_ollama,) DEFAULT_SYSTEM_MESSAGE["gemma2_chatml"] = None # No system message in Gemma 2 pass # =========================================== Llama-3 # Weirdly \n\n is needed? llama3_template = \ "{{ bos_token }}"\ "{% for message in messages %}"\ "{% if message['role'] == 'user' %}"\ "{{ '<|start_header_id|>user<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' }}"\ "{% elif message['role'] == 'assistant' %}"\ "{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' }}"\ "{% else %}"\ "{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' }}"\ "{% endif %}"\ "{% endfor %}"\ "{% if add_generation_prompt %}"\ "{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}"\ "{% endif %}" pass # Ollama from https://www.ollama.com/library/llama3 llama3_ollama = \ ''' FROM {__FILE_LOCATION__} TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|> {{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|> {{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|> {{ .Response }}<|eot_id|>""" PARAMETER stop "<|start_header_id|>" PARAMETER stop "<|end_header_id|>" PARAMETER stop "<|eot_id|>" PARAMETER temperature 1.5 PARAMETER min_p 0.1 ''' llama3_template_eos_token = "eos_token" CHAT_TEMPLATES["llama-3"] = (llama3_template, llama3_template_eos_token, False, llama3_ollama,) DEFAULT_SYSTEM_MESSAGE["llama-3"] = None # No system message in Llama-3 CHAT_TEMPLATES["llama3"] = (llama3_template, llama3_template_eos_token, False, llama3_ollama,) DEFAULT_SYSTEM_MESSAGE["llama3"] = None # No system message in Llama-3 pass # =========================================== Phi-3 # "{{ bos_token }}"\ # Phi-3.5 removes BOS? phi3_template = \ "{% for message in messages %}"\ "{% if message['role'] == 'user' %}"\ "{{'<|user|>\n' + message['content'] + '<|end|>\n'}}"\ "{% elif message['role'] == 'assistant' %}"\ "{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}"\ "{% else %}"\ "{{'<|' + message['role'] + '|>\n' + message['content'] + '<|end|>\n'}}"\ "{% endif %}"\ "{% endfor %}"\ "{% if add_generation_prompt %}"\ "{{ '<|assistant|>\n' }}"\ "{% endif %}" pass # Ollama from https://www.ollama.com/library/phi3 phi3_ollama = \ ''' FROM {__FILE_LOCATION__} TEMPLATE """{{ if .System }}<|system|> {{ .System }}<|end|> {{ end }}{{ if .Prompt }}<|user|> {{ .Prompt }}<|end|> {{ end }}<|assistant|> {{ .Response }}<|end|> """ PARAMETER stop "<|end|>" PARAMETER stop "<|user|>" PARAMETER stop "<|assistant|>" PARAMETER temperature 1.5 PARAMETER min_p 0.1 ''' phi3_template_eos_token = "<|end|>" CHAT_TEMPLATES["phi-3"] = (phi3_template, phi3_template_eos_token, False, phi3_ollama,) DEFAULT_SYSTEM_MESSAGE["phi-3"] = None # No system message in Phi-3 CHAT_TEMPLATES["phi-35"] = CHAT_TEMPLATES["phi-3"] DEFAULT_SYSTEM_MESSAGE["phi-35"] = None # No system message in Phi-3.5 CHAT_TEMPLATES["phi-3.5"] = CHAT_TEMPLATES["phi-3"] DEFAULT_SYSTEM_MESSAGE["phi-3.5"] = None # No system message in Phi-3.5 pass # =========================================== Llama-3.1 """ No trimming in Llama 3.1 Instruct! Also an extra newline for Cutting Knowledge Date See https://colab.research.google.com/drive/1Xpqq5xpIgO-B00MQ-UccYMwN2J8QFgBM?usp=sharing Also should be import datetime tokenizer.apply_chat_template( messages, add_generation_prompt = True, tokenize = False, date_string = datetime.today().strftime("%d %B %Y")), ) """ llama31_template = \ """{{- bos_token }} {%- if custom_tools is defined %} {%- set tools = custom_tools %} {%- endif %} {%- if not tools_in_user_message is defined %} {%- set tools_in_user_message = true %} {%- endif %} {%- if not date_string is defined %} {%- set date_string = "26 July 2024" %} {%- endif %} {%- if not tools is defined %} {%- set tools = none %} {%- endif %} {#- This block extracts the system message, so we can slot it into the right place. #} {%- if messages[0]['role'] == 'system' %} {%- set system_message = messages[0]['content'] %} {%- set messages = messages[1:] %} {%- else %} {%- set system_message = "{system_message}" %} {%- endif %} {#- System message + builtin tools #} {{- "<|start_header_id|>system<|end_header_id|>\n\n" }} {%- if builtin_tools is defined or tools is not none %} {{- "Environment: ipython\n" }} {%- endif %} {%- if builtin_tools is defined %} {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} {%- endif %} {{- "Cutting Knowledge Date: December 2023\n" }} {{- "Today Date: " + date_string + "\n\n" }} {%- if tools is not none and not tools_in_user_message %} {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} {{- "Do not use variables.\n\n" }} {%- for t in tools %} {{- t | tojson(indent=4) }} {{- "\n\n" }} {%- endfor %} {%- endif %} {{- system_message }} {{- "<|eot_id|>" }} {#- Custom tools are passed in a user message with some extra guidance #} {%- if tools_in_user_message and not tools is none %} {#- Extract the first user message so we can plug it in here #} {%- if messages | length != 0 %} {%- set first_user_message = messages[0]['content'] %} {%- set messages = messages[1:] %} {%- else %} {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} {%- endif %} {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} {{- "Given the following functions, please respond with a JSON for a function call " }} {{- "with its proper arguments that best answers the given prompt.\n\n" }} {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} {{- "Do not use variables.\n\n" }} {%- for t in tools %} {{- t | tojson(indent=4) }} {{- "\n\n" }} {%- endfor %} {{- first_user_message + "<|eot_id|>"}} {%- endif %} {%- for message in messages %} {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] + '<|eot_id|>' }} {%- elif 'tool_calls' in message %} {%- if not message.tool_calls|length == 1 %} {{- raise_exception("This model only supports single tool-calls at once!") }} {%- endif %} {%- set tool_call = message.tool_calls[0].function %} {%- if builtin_tools is defined and tool_call.name in builtin_tools %} {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} {{- "<|python_tag|>" + tool_call.name + ".call(" }} {%- for arg_name, arg_val in tool_call.arguments | items %} {{- arg_name + '="' + arg_val + '"' }} {%- if not loop.last %} {{- ", " }} {%- endif %} {%- endfor %} {{- ")" }} {%- else %} {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} {{- '{"name": "' + tool_call.name + '", ' }} {{- '"parameters": ' }} {{- tool_call.arguments | tojson }} {{- "}" }} {%- endif %} {%- if builtin_tools is defined %} {#- This means we're in ipython mode #} {{- "<|eom_id|>" }} {%- else %} {{- "<|eot_id|>" }} {%- endif %} {%- elif message.role == "tool" or message.role == "ipython" %} {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} {%- if message.content is mapping or message.content is iterable %} {{- message.content | tojson }} {%- else %} {{- message.content }} {%- endif %} {{- "<|eot_id|>" }} {%- endif %} {%- endfor %} {%- if add_generation_prompt %} {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} {%- endif %} """ pass # Ollama from https://ollama.com/library/llama3.1 (needs updating!) llama31_ollama = \ ''' FROM {__FILE_LOCATION__} TEMPLATE """{{ if .Messages }} {{- if or .System .Tools }}<|start_header_id|>system<|end_header_id|> {{- if .System }} {{ .System }} {{- end }} {{- if .Tools }} You are a helpful assistant with tool calling capabilities. When you receive a tool call response, use the output to format an answer to the original use question. {{- end }} {{- end }}<|eot_id|> {{- range $i, $_ := .Messages }} {{- $last := eq (len (slice $.Messages $i)) 1 }} {{- if eq .Role "user" }}<|start_header_id|>user<|end_header_id|> {{- if and $.Tools $last }} Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. Do not use variables. {{ $.Tools }} {{- end }} {{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|> {{ end }} {{- else if eq .Role "assistant" }}<|start_header_id|>assistant<|end_header_id|> {{- if .ToolCalls }} {{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "parameters": {{ .Function.Arguments }}}{{ end }} {{- else }} {{ .Content }}{{ if not $last }}<|eot_id|>{{ end }} {{- end }} {{- else if eq .Role "tool" }}<|start_header_id|>ipython<|end_header_id|> {{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|> {{ end }} {{- end }} {{- end }} {{- else }} {{- if .System }}<|start_header_id|>system<|end_header_id|> {{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|> {{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|> {{ end }}{{ .Response }}{{ if .Response }}<|eot_id|>{{ end }}""" PARAMETER stop "<|start_header_id|>" PARAMETER stop "<|end_header_id|>" PARAMETER stop "<|eot_id|>" PARAMETER stop "<|eom_id|>" PARAMETER temperature 1.5 PARAMETER min_p 0.1 ''' llama31_template_eos_token = "eos_token" CHAT_TEMPLATES["llama-3.1"] = (llama31_template, llama31_template_eos_token, False, llama31_ollama,) DEFAULT_SYSTEM_MESSAGE["llama-3.1"] = "" # Llama3.1 default system message is empty + the dates CHAT_TEMPLATES["llama-31"] = (llama31_template, llama31_template_eos_token, False, llama31_ollama,) DEFAULT_SYSTEM_MESSAGE["llama-31"] = "" # Llama3.1 default system message is empty + the dates pass # =========================================== Qwen 2.5 qwen25_template = \ """{%- if tools %} {{- \'<|im_start|>system\\n\' }} {%- if messages[0][\'role\'] == \'system\' %} {{- messages[0][\'content\'] }} {%- else %} {{- \'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\' }} {%- endif %} {{- "\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n" }} {%- for tool in tools %} {{- "\\n" }} {{- tool | tojson }} {%- endfor %} {{- "\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\"name\\": , \\"arguments\\": }\\n<|im_end|>\\n" }}\n{%- else %} {%- if messages[0][\'role\'] == \'system\' %} {{- \'<|im_start|>system\\n\' + messages[0][\'content\'] + \'<|im_end|>\\n\' }} {%- else %} {{- \'<|im_start|>system\\n{system_message}<|im_end|>\\n\' }} {%- endif %}\n{%- endif %}\n{%- for message in messages %} {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} {{- \'<|im_start|>\' + message.role + \'\\n\' + message.content + \'<|im_end|>\' + \'\\n\' }} {%- elif message.role == "assistant" %} {{- \'<|im_start|>\' + message.role }} {%- if message.content %} {{- \'\\n\' + message.content }} {%- endif %} {%- for tool_call in message.tool_calls %} {%- if tool_call.function is defined %} {%- set tool_call = tool_call.function %} {%- endif %} {{- \'\\n\\n{"name": "\' }} {{- tool_call.name }} {{- \'", "arguments": \' }} {{- tool_call.arguments | tojson }} {{- \'}\\n\' }} {%- endfor %} {{- \'<|im_end|>\\n\' }} {%- elif message.role == "tool" %} {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} {{- \'<|im_start|>user\' }} {%- endif %} {{- \'\\n\\n\' }} {{- message.content }} {{- \'\\n\' }} {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} {{- \'<|im_end|>\\n\' }} {%- endif %} {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %} {{- \'<|im_start|>assistant\\n\' }} {%- endif %} """ # Ollama from https://ollama.com/library/qwen2.5/blobs/eb4402837c78 qwen25_ollama = \ ''' FROM {__FILE_LOCATION__} TEMPLATE """{{- if .Messages }} {{- if or .System .Tools }}<|im_start|>system {{- if .System }} {{ .System }} {{- end }} {{- if .Tools }} # Tools You may call one or more functions to assist with the user query. You are provided with function signatures within XML tags: {{- range .Tools }} {"type": "function", "function": {{ .Function }}} {{- end }} For each function call, return a json object with function name and arguments within XML tags: {"name": , "arguments": } {{- end }}<|im_end|> {{ end }} {{- range $i, $_ := .Messages }} {{- $last := eq (len (slice $.Messages $i)) 1 -}} {{- if eq .Role "user" }}<|im_start|>user {{ .Content }}<|im_end|> {{ else if eq .Role "assistant" }}<|im_start|>assistant {{ if .Content }}{{ .Content }} {{- else if .ToolCalls }} {{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}} {{ end }} {{- end }}{{ if not $last }}<|im_end|> {{ end }} {{- else if eq .Role "tool" }}<|im_start|>user {{ .Content }} <|im_end|> {{ end }} {{- if and (ne .Role "assistant") $last }}<|im_start|>assistant {{ end }} {{- end }} {{- else }} {{- if .System }}<|im_start|>system {{ .System }}<|im_end|> {{ end }}{{ if .Prompt }}<|im_start|>user {{ .Prompt }}<|im_end|> {{ end }}<|im_start|>assistant {{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}""" PARAMETER stop "<|im_end|>" PARAMETER stop "<|endoftext|>" PARAMETER temperature 1.5 PARAMETER min_p 0.1 ''' qwen25_template_eos_token = "eos_token" qwen25_default_system_message = "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." CHAT_TEMPLATES["qwen-2.5"] = (qwen25_template, qwen25_template_eos_token, False, qwen25_ollama,) DEFAULT_SYSTEM_MESSAGE["qwen-2.5"] = qwen25_default_system_message # No system message in Qwen 2.5 CHAT_TEMPLATES["qwen-25"] = (qwen25_template, qwen25_template_eos_token, False, qwen25_ollama,) DEFAULT_SYSTEM_MESSAGE["qwen-25"] = qwen25_default_system_message # No system message in Qwen 2.5 CHAT_TEMPLATES["qwen25"] = (qwen25_template, qwen25_template_eos_token, False, qwen25_ollama,) DEFAULT_SYSTEM_MESSAGE["qwen25"] = qwen25_default_system_message # No system message in Qwen 2.5 CHAT_TEMPLATES["qwen2.5"] = (qwen25_template, qwen25_template_eos_token, False, qwen25_ollama,) DEFAULT_SYSTEM_MESSAGE["qwen2.5"] = qwen25_default_system_message # No system message in Qwen 2.5 pass def _change_system_message(template: str, type_chat_template: str, system_message: str = None): system_message_pattern = r"\{system_message\}" # For predefined templates, check if default system message exists default_system_message = DEFAULT_SYSTEM_MESSAGE.get(f"{type_chat_template}", None) if default_system_message is None: if system_message is not None: logger.warning_once( f"Unsloth: You tried to change the system message for {type_chat_template}, " "but it doesn't have a default system message. " "You need to manually add the system message in your data." ) return template, system_message pass # For custom templates if type_chat_template is None: has_placeholder = re.search(system_message_pattern, template) is not None if has_placeholder: if system_message is None: raise ValueError("Unsloth: You need to provide a system message for custom templates.") new_template = re.sub(system_message_pattern, system_message, template) return new_template, system_message return template, system_message pass # For predefined templates with default system message message_to_use = system_message if system_message is not None else default_system_message new_template = re.sub(system_message_pattern, message_to_use, template) return new_template, message_to_use pass def get_chat_template( tokenizer, chat_template = "chatml", mapping = {"role" : "role", "content" : "content", "user" : "user", "assistant" : "assistant"}, map_eos_token = True, system_message = None, ): assert(type(map_eos_token) is bool) old_tokenizer = tokenizer IS_GEMMA = False if tokenizer.__class__.__name__.startswith("Gemma"): if chat_template == "chatml": chat_template = "gemma_chatml" IS_GEMMA = True pass # We add a check for Llama-3 # if chat_template == "llama-3": # tokenizer._using_llama3_template = True # else: # llama3_tokens = set(["<|end_header_id|>", "<|eot_id|>", "<|start_header_id|>"]) # check_llama3_tokens = llama3_tokens & set(str(x) for x in tokenizer.added_tokens_decoder.values()) # if len(check_llama3_tokens) == len(llama3_tokens): # tokenizer._using_llama3_template = True # pass # pass # We first check if the tokenizer is a fast one. If not, we cannot convert this! is_fast_tokenizer = getattr(tokenizer, "is_fast", False) old_padding_side = tokenizer.padding_side same_padding_token = False type_chat_template = None if type(chat_template) in (list, tuple,): # For changing system message later # Since it's not supported yet, we will raise an error first! type_chat_template = chat_template[0].lower() chat_template, stop_word = chat_template assert(type(chat_template) is str) assert(type(stop_word) is str) ollama_modelfile = None elif type(chat_template) is str: # For changing system message later type_chat_template = chat_template.lower() chat_template, stop_word, yes_map_eos_token, ollama_modelfile = CHAT_TEMPLATES[chat_template] # Check mapping to eos_token if not map_eos_token and yes_map_eos_token: map_eos_token = True if not yes_map_eos_token and map_eos_token: map_eos_token = False if type(stop_word) in (list, tuple,): token_mapping, stop_word = stop_word assert(type(token_mapping) is dict) else: token_mapping = None assert(type(stop_word) is str) # Check fast tokenizer if not is_fast_tokenizer: print( "Unsloth: Not a fast tokenizer, so can't process it as of yet :(\n"\ "Please log a Github issue if you want this as a new feature!\n"\ "Your chat template will still work, but it won't add or edit tokens." ) elif token_mapping is not None: # token_mapping = {"" : "<|im_start|>", "" : "<|im_end|>"} # For Gemma :) string_vocab = tokenizer._tokenizer.to_str() skipped = 0 for old_token, new_token in token_mapping.items(): old_count = string_vocab.count(f'"{old_token}"') new_count = string_vocab.count(f'"{new_token}"') if new_count != 0: print(f"{new_token} is already a token. Skipping.") skipped += 1 elif old_count == 0: raise RuntimeError(f"{old_token} was not part of the tokenizer!") else: string_vocab = string_vocab.replace(f'"{old_token}"', f'"{new_token}"') pass pass if map_eos_token and (not stop_word in token_mapping.values()): # Do not map 107 = <|im_end|> and 1 = <|im_end|>. This will reduce the vocab size by 1 logger.warning_once(f"Unsloth: Will map {stop_word} to EOS = {tokenizer.eos_token}.") string_vocab = string_vocab.replace(tokenizer.eos_token, stop_word) pass if skipped != len(token_mapping): new_tokenizer = tokenizer._tokenizer.from_str(string_vocab) # Careful on pad_token old_pad_token = tokenizer.pad_token if old_pad_token == tokenizer.eos_token: old_pad_token = stop_word same_padding_token = True pass if map_eos_token: new_tokenizer = tokenizer.__class__( tokenizer_object = new_tokenizer, eos_token = stop_word, pad_token = old_pad_token, ) else: new_tokenizer = tokenizer.__class__( tokenizer_object = new_tokenizer, pad_token = old_pad_token, ) pass # Must fix the sentence piece tokenizer since there's no tokenizer.model file! tokenizer = fix_sentencepiece_tokenizer(tokenizer, new_tokenizer, token_mapping,) else: pass elif map_eos_token and (stop_word != "eos_token"): logger.warning_once(f"Unsloth: Will map {stop_word} to EOS = {tokenizer.eos_token}.") # Replaces the old EOS token with a new one. # Useful for ChatML <|im_end|> for example. # Usually we train 2 more tokens <|im_start|> and <|im_end|> # But training the lm_head and embeddings are slow! # This is a HACK! # Idea from https://huggingface.co/cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser old_bos_token = getattr(tokenizer, "bos_token", None) old_eos_token = getattr(tokenizer, "eos_token", None) old_pad_token = getattr(tokenizer, "pad_token", None) old_unk_token = getattr(tokenizer, "unk_token", None) string_vocab = tokenizer._tokenizer.to_str() # First check if new stop_word is in the tokenizer if stop_word in string_vocab: # We shall swap them around temporary_stop_token = "<|:__TEMP//STOP//TOKEN__:|>" string_vocab = string_vocab.replace(old_eos_token, temporary_stop_token) string_vocab = string_vocab.replace(stop_word, old_eos_token) string_vocab = string_vocab.replace(temporary_stop_token, stop_word) else: string_vocab = string_vocab.replace(old_eos_token, stop_word) pass new_tokenizer = tokenizer._tokenizer.from_str(string_vocab) # Careful on pad_token if old_pad_token == old_eos_token: old_pad_token = stop_word same_padding_token = True pass new_tokenizer = tokenizer.__class__( tokenizer_object = new_tokenizer, bos_token = old_bos_token, eos_token = stop_word, unk_token = old_unk_token, pad_token = old_pad_token, ) # Must fix the sentence piece tokenizer since there's no tokenizer.model file! token_mapping = { old_eos_token : stop_word, } tokenizer = fix_sentencepiece_tokenizer(tokenizer, new_tokenizer, token_mapping,) pass else: raise TypeError( f"Unsloth: `chat_template` must be a tuple of (your_template, eos_token,) or one of\n"\ f"{CHAT_TEMPLATES.keys()}" ) pass # Careful on Gemma # bos_token is a must or else losses become too high if IS_GEMMA and not chat_template.startswith(("{{ bos_token }}", "{{- bos_token }}")): chat_template = "{{ bos_token }}" + chat_template pass # For ShareGPT role -> from and content -> value new_chat_template = chat_template\ .replace("'role'", "'" + mapping["role"] + "'")\ .replace("'content'", "'" + mapping["content"] + "'")\ .replace("'user'", "'" + mapping["user"] + "'")\ .replace("'assistant'", "'" + mapping["assistant"] + "'") _, tokenizer = patch_tokenizer(model = None, tokenizer = tokenizer) tokenizer.padding_side = old_padding_side # If not normal HF, we add a check to make old templates work if mapping != {"role" : "role", "content" : "content", "user" : "user", "assistant" : "assistant"}: chat_template = \ "{% if 'role' in messages[0] %}" + \ chat_template + \ "{% else %}" + \ new_chat_template + \ "{% endif %}" else: chat_template = new_chat_template pass chat_template, system_message = _change_system_message(chat_template, type_chat_template, system_message) tokenizer.chat_template = chat_template # Also fix up other tokens old_pad_token = getattr(old_tokenizer, "pad_token", None) old_bos_token = getattr(old_tokenizer, "bos_token", None) old_unk_token = getattr(old_tokenizer, "unk_token", None) new_pad_token = getattr(tokenizer, "pad_token", None) new_bos_token = getattr(tokenizer, "bos_token", None) new_unk_token = getattr(tokenizer, "unk_token", None) if old_bos_token != new_bos_token: tokenizer.bos_token = old_bos_token if old_unk_token != new_unk_token: tokenizer.unk_token = old_unk_token if not same_padding_token: if old_pad_token != new_pad_token: tokenizer.pad_token = old_pad_token pass # stopping_criteria = create_stopping_criteria(tokenizer, stop_word) # Patch saving functions tokenizer = patch_saving_functions(tokenizer) # Add Ollama tokenizer._ollama_modelfile = ollama_modelfile tokenizer._system_message = system_message return tokenizer#, stopping_criteria pass def remove_special_tokens(tokenizer, prompt): # Removes double BOS token if prompt.startswith(tokenizer.bos_token): prompt = prompt[len(tokenizer.bos_token):] pass return prompt pass def _parse_combined_prompt(combined_prompt, dataset): # Find {...} possible_columns = re.findall(r"\{(.+?)\}", combined_prompt) dataset_columns = set(dataset.column_names) for column in possible_columns: if column not in dataset_columns: raise KeyError( f"Unsloth: Your prompt includes '{column}' but this does not exist in the dataset. "\ f"Only allowed columns are {list(dataset_columns)}" ) pass pass # Find [[...]] optional_prompts = list(re.finditer(r"\[\[.+?\]\]", combined_prompt, flags = re.DOTALL | re.MULTILINE)) optional_prompts = [(x.span(), x.group(0)) for x in optional_prompts] final_optional_prompts = [] if len(optional_prompts) != 0: # Add left left = optional_prompts[0] l = left[0][0] if l != 0: final_optional_prompts.append(combined_prompt[:l]) # Add in between for left, right in zip(optional_prompts[:-1], optional_prompts[1:]): l, r = left[0][-1], right[0][0] final_optional_prompts.append(left) if l != r: final_optional_prompts.append(combined_prompt[l : r]) pass final_optional_prompts.append(optional_prompts[-1]) # Add right right = optional_prompts[-1] r = right[0][1] if r != len(combined_prompt): final_optional_prompts.append(combined_prompt[r:]) else: # Just add in the entire string final_optional_prompts.append(combined_prompt) pass check_combined = "".join(x if type(x) is str else x[1] for x in final_optional_prompts) assert(combined_prompt == check_combined) return possible_columns, final_optional_prompts pass def _create_formatter(possible_columns, final_optional_prompts, user_column_name): # Start final prompt! function = ["def __combined_prompt_processor__(examples):"] columns = list(set(possible_columns)) for column in columns: function.append(f"{' '*4}{column}__ = examples['{column}']") function.append(f"{' '*4}texts = []") function.append(f"{' '*4}for ({', '.join(columns)}) in zip({', '.join(f'{x}__' for x in columns)}):") # Add optional tags as well! final_prompt = "" formatter = [] for j, optional_prompt in enumerate(final_optional_prompts): if type(optional_prompt) is str: columns = re.findall(r"\{(.+?)\}", optional_prompt) formatter += columns # Must escape \n \r final_prompt += optional_prompt.encode("unicode-escape").decode("utf-8").replace("'", "\\'").replace('"', '\\"') else: where, prompt = optional_prompt # Strip [[...]] # Must escape \n \r prompt = prompt[2:-2].encode("unicode-escape").decode("utf-8").replace("'", "\\'").replace('"', '\\"') columns = re.findall(r"\{(.+?)\}", prompt) x = f"__optional_{j}__" prompt = f"{' '*8}{x} = '{prompt}'.format({', '.join(f'{x} = {x}' for x in columns)}) if {columns[0]} else ''" function.append(prompt) formatter.append(x) final_prompt += "{" + x + "}" pass pass function.insert(1, f"{' '*4}__combined_prompt__ = '{final_prompt}'") function.append(f"{' '*8}texts.append("\ f"__combined_prompt__.format({', '.join(f'{x} = {x}' for x in formatter)}))") function.append(f"{' '*4}return " + "{ " + f"'{user_column_name}' : texts" + " }") return "\n".join(function) pass def to_sharegpt( dataset, merged_prompt = "", merged_column_name = "instruction", output_column_name = "output", remove_unused_columns = True, conversation_extension = 1, random_state = 3407, ): """ Converts a dataset to ShareGPT style. ShareGPT requires only 1 input and 1 output field. This means one has to merge multiple columns into 1 for 1 input field. Use `conversation_extension` to increase the length of each conversation by randomnly selecting a few and packing them into 1. merged_prompt = "", Prompt to merge columns into 1 input merged_column_name = "instruction", Final column name for the input field output_column_name = "output", Final column name for the output field remove_unused_columns = True, conversation_extension = 1, Automatically combines `conversation_extension` convos into 1 random_state = 3407, """ if "conversations" in dataset.column_names: convo = dataset[0]["conversations"] if type(convo) is list: raise TypeError("Unsloth: Your dataset is probably already in ShareGPT format!") pass pass possible_columns, final_optional_prompts = _parse_combined_prompt(merged_prompt, dataset) function = _create_formatter(possible_columns, final_optional_prompts, merged_column_name) exec(function, globals()) dataset = dataset.map(__combined_prompt_processor__, batched = True, desc = "Merging columns") def __convert_to_sharegpt__(examples): users = examples[merged_column_name] assistants = examples[output_column_name] texts = [ [ {"from" : "human", "value" : str(user) }, {"from" : "gpt", "value" : str(assistant)}, ] \ for user, assistant in zip(users, assistants) ] return { "conversations" : texts, } pass dataset = dataset.map( __convert_to_sharegpt__, batched = True, desc = "Converting to ShareGPT", # Remove unused columns! remove_columns = dataset.column_names if remove_unused_columns else None, ) # Randomnly concat conversations to create a long stream! from datasets import concatenate_datasets n_extensions = max(conversation_extension-1, 0) if n_extensions == 0: return dataset dataset = dataset.rename_columns({"conversations" : "conversations0"}) all_shuffled = [dataset] for j in range(1, n_extensions+1): shuffled = dataset.shuffle(seed = random_state+j).rename_columns({"conversations0" : f"conversations{j}"}) all_shuffled.append(shuffled) pass dataset = concatenate_datasets(all_shuffled, axis = 1) # Combine them into 1 function = "def __combine_conversations__(examples):\n" n_extensions += 1 for j in range(n_extensions): function += f"{' '*4}conversations{j}__ = examples['conversations{j}']\n" function += f"{' '*4}convos = []\n" function += f"{' '*4}for ({', '.join(f'conversations{j}' for j in range(n_extensions))}) "\ f"in zip({', '.join(f'conversations{j}__' for j in range(n_extensions))}):\n" function += f"{' '*8}convos.append("\ f"{'+'.join(f'conversations{j}' for j in range(n_extensions))})\n" function += f"{' '*4}return " + "{ " + "'conversations' : convos" + " }" # Map function exec(function, globals()) dataset = dataset.map( __combine_conversations__, batched = True, desc = "Extending conversations", # Remove unused columns! remove_columns = dataset.column_names if remove_unused_columns else None, ) return dataset pass def standardize_sharegpt( dataset, aliases_for_system = ["system",], aliases_for_user = ["user", "human", "input",], aliases_for_assistant = ["gpt", "assistant", "output",], ): """ Standardizes ShareGPT and other formats to user/assistant Hugging Face format. Get aliases for the system, user and assistant roles. These shall map to "system", "user" and "assistant" respectively. aliases_for_system = ["system",], aliases_for_user = ["user", "human", "input",], aliases_for_assistant = ["gpt", "assistant", "output",], """ import collections import itertools convos = dataset[:10]["conversations"] uniques = collections.defaultdict(list) for convo in convos: for message in convo: for key, value in message.items(): uniques[key].append(value) pass # Must be only 2 entries assert(len(uniques.keys()) == 2) keys = list(uniques.keys()) length_first = len(set(uniques[keys[0]])) length_second = len(set(uniques[keys[1]])) if length_first < length_second: # Role is assigned to the first element role_key = keys[0] content_key = keys[1] else: role_key = keys[1] content_key = keys[0] pass # Check roles are in aliases all_aliases = set(aliases_for_system + aliases_for_user + aliases_for_assistant) roles = set(uniques[role_key]) leftover_aliases = (all_aliases | roles) - all_aliases if len(leftover_aliases) != 0: raise TypeError( f"Unsloth: {list(leftover_aliases)} are not in aliases. Please update aliases." ) pass # Mapping for aliases aliases_mapping = {} for x in aliases_for_system: aliases_mapping[x] = "system" for x in aliases_for_user: aliases_mapping[x] = "user" for x in aliases_for_assistant: aliases_mapping[x] = "assistant" def _standardize_dataset(examples): convos = examples["conversations"] all_convos = [] for convo in convos: new_convo = [ { "role" : aliases_mapping[message[role_key]], "content" : message[content_key], } for message in convo ] all_convos.append(new_convo) pass return { "conversations" : all_convos, } pass return dataset.map(_standardize_dataset, batched = True, desc = "Standardizing format") pass def get_ollama_eos_tokens(tokenizer, extra_eos_tokens = []): added_tokens_decoder = tokenizer.added_tokens_decoder.values() added_tokens_decoder = [str(x) for x in added_tokens_decoder] # Remove added_tokens_decoder duplicates added_tokens_decoder = list(set(added_tokens_decoder) - set(extra_eos_tokens)) # Remove BOS if getattr(tokenizer, "bos_token", None) is not None: added_tokens_decoder = [x for x in added_tokens_decoder if x != tokenizer.bos_token] pass repeatted_tokens = [] # Join all vocab joined_text = "\x01\x00".join(added_tokens_decoder) for token in added_tokens_decoder: n = len(token) repeatted_counts = joined_text.count(token[:n//2]) # Try finding longer than 1/2 of the token in the rest # For eg <|reserved_special_token_0|>, <|reserved_special_token_1|> if repeatted_counts > 2: for j in range(n//2+1, n): if joined_text.count(token[:j]) < repeatted_counts: j -= 1 # Remove repeatted tokens to reduce search space joined_text = joined_text.replace(token[:j], "") repeatted_tokens.append(token[:j]) break pass pass pass # Remove duplicates splitted = joined_text.split("\x01\x00") final_eos_tokens = [] for old, new in zip(added_tokens_decoder, splitted): if old == new: final_eos_tokens.append(old) pass final_eos_tokens += extra_eos_tokens final_eos_tokens += repeatted_tokens # Remove new lines, spaces and HTML tags filtered_eos_tokens = [] for token in final_eos_tokens: if token.count("\n") == len(token): continue elif token.count("▁") == len(token): continue elif token.startswith("<") and len(token) <= 2: continue elif token.startswith("<|start_header_id|>system<|end_header_id|> {SYSTEM}<|eot_id|><|start_header_id|>user<|end_header_id|> {INPUT}<|eot_id|><|start_header_id|>assistant<|end_header_id|> {OUTPUT}<|eot_id|><|start_header_id|>user<|end_header_id|> {INPUT}<|eot_id|><|start_header_id|>assistant<|end_header_id|> {OUTPUT}<|eot_id|>""", default_system_message = \ "Below are some instructions that describe some tasks. Write responses that appropriately complete each request.", extra_eos_tokens = None, ): """ Creates a Ollama modelfile and a HF Jinja template from a custom template. You must provide 2x examples of an input & output. There is an optional system message as well. You must use {INPUT}, {OUTPUT} twice, and {SYSTEM} is optional. """ # Strip only the left chat_template = chat_template.lstrip() assert(tokenizer is not None) if extra_eos_tokens is None: extra_eos_tokens = [] elif type(extra_eos_tokens) is str: extra_eos_tokens = [extra_eos_tokens,] vocab = tokenizer.get_vocab() for extra_eos in extra_eos_tokens: assert(type(extra_eos) is str) if extra_eos not in vocab: raise ValueError(f"Unsloth: `{extra_eos}` is not a singular token in the tokenizer.") pass pass error_msg = \ "Unsloth: Your prompt template must have 2 examples showing the user input {INPUT} "\ "and the assistant output {OUTPUT}\n\n"\ "For example what is not allowed is just:\n"\ "### Input:\\n{INPUT}\\n\\n### Response:\\n{OUTPUT}\\n\n\n"\ "What is required is 2x of this:\n"\ "### Input:\\n{INPUT}\\n\\n### Response:\\n{OUTPUT}\\n"\ "### Input:\\n{INPUT}\\n\\n### Response:\\n{OUTPUT}\\n" # Check for EOS after {OUTPUT} if tokenizer.eos_token is not None: extra_eos_tokens.insert(0, tokenizer.eos_token) if len(extra_eos_tokens) == 0: raise RuntimeError( "Unsloth: Your tokenizer does not have an EOS token? Please provide one via extra_eos_tokens!" ) pass # Check tokenizer types tokenizer_name = tokenizer.name_or_path.lower() if tokenizer_name.startswith(("unsloth/llama-3-8b-instruct", "unsloth/llama-3-70b-instruct")): # Add <|eot_id|> extra_eos_tokens.append("<|eot_id|>") elif ("<|eot_id|>" in extra_eos_tokens or "<|eot_id|>" in chat_template) and \ tokenizer_name.startswith(("unsloth/llama-3-8b", "unsloth/llama-3-70b")): # Warn logger.warning( "Unsloth: Base llama-3 models did not train <|eot_id|>.\n"\ "Please use the instruct version or use <|end_of_text|>" ) pass extra_eos_tokens = list(set(extra_eos_tokens)) count_eos = 0 for eos in extra_eos_tokens: count_eos += len(re.findall(r"{OUTPUT}" + re.escape(eos), chat_template)) pass # This forces you to provide 2 input and outputs final_combined_check = False try: # O(N^2) search finding 2 repeatted pieces of text j = len(chat_template)-1 at_least_one = False while j > 0: found = chat_template.rfind(chat_template[j:], 0, j) if found == -1: break j -= 1 at_least_one = True pass if j > 0: j += 1 else: raise RuntimeError(error_msg) if not at_least_one: raise RuntimeError(error_msg) # Must be equivalent to left final_combined_check = True # Repeatted text instruction_response = chat_template[j:] if instruction_response.count("{INPUT}") != 1 or instruction_response.count("{OUTPUT}") != 1: raise RuntimeError(error_msg) pass # 1st System, Instruction, Output pair left = chat_template[:j] # 2nd Instruction, Output pair right = chat_template[j:] final_combined_check = left if final_combined_check else chat_template # Isolate input extra_eos_tokens_regex = "|".join(f"(?:{re.escape(x)})" for x in extra_eos_tokens) if len(extra_eos_tokens_regex) != 0: find_end = f"(?:{extra_eos_tokens_regex})?" else: find_end = "" find_end = r"\{INPUT\}[\s\n]{0,}" + find_end input_end = list(re.finditer(find_end, right)) assert(len(input_end) == 1) input_end = input_end[0] input_end = input_end.span(0)[1] input_part = right[:input_end] # Isolate output output_part = right[input_end:] # Isolate system where_system = left.find(input_part) system_part = left[:where_system if where_system != -1 else len(left)] # Check if the user provided a correct prompt combined = system_part + input_part + output_part if combined != final_combined_check: combined_changed = combined .replace('\n', '\\n') left_changed = final_combined_check.replace('\n', '\\n') raise RuntimeError( "Unsloth: The prompt template you provided isn't correct. You gave:\n"\ f"{combined_changed}\n\n"\ "But we require the following:\n"\ f"{left_changed}" ) pass except: ending = chat_template[chat_template.find("{OUTPUT}") + len("{OUTPUT}"):] ending = re.escape(ending) find_text = "{INPUT}" + ending + "(.+?{OUTPUT}" + ending + ")" response_part = re.findall(find_text, chat_template, flags = re.DOTALL | re.MULTILINE) response_part = response_part[0] for j in range(1, len(response_part)): try_find = re.escape(response_part[:j]) try: found = next(re.finditer("(" + try_find + ").+?\{INPUT\}", chat_template, flags = re.DOTALL | re.MULTILINE)) except: break pass separator = found.group(1) response_start = chat_template.find(response_part) start_instruction = chat_template[:response_start].rfind(separator) if start_instruction == -1: start_instruction = 0 instruction_part = chat_template[start_instruction:response_start] combined = instruction_part + response_part where = chat_template.find(combined) system_part = chat_template[:where] system_part, input_part, output_part = system_part, instruction_part, response_part pass if count_eos == 0: logger.warning("Unsloth: We automatically added an EOS token to stop endless generations.") eos = extra_eos_tokens[0] output_part = output_part + eos pass # Ollama modelfile parts # Check bos_token is in system prompt ollama_system = system_part has_bos_token = False always_bos_token = False if tokenizer("A").input_ids[0] == getattr(tokenizer, "bos_token_id", None): always_bos_token = True if ollama_system.startswith(tokenizer.bos_token): has_bos_token = True ollama_system = ollama_system[len(tokenizer.bos_token):] pass pass # Check system if "{SYSTEM}" in ollama_system: system_modelfile = "{{ if .System }}" + ollama_system.replace("{SYSTEM}", "{{ .System }}") + "{{ end }}" else: system_modelfile = ollama_system pass input_modelfile = "{{ if .Prompt }}" + input_part .replace("{INPUT}", "{{ .Prompt }}") + "{{ end }}" output_modelfile = output_part.replace("{OUTPUT}", "{{ .Response }}") # Ollama EOS ollama_eos = get_ollama_eos_tokens(tokenizer, extra_eos_tokens) ollama_eos = '\n'.join(f'PARAMETER stop "{eos}"' for eos in ollama_eos) # Add temperature and min_p to counteract gibberish ollama_eos += "\nPARAMETER temperature 1.5\nPARAMETER min_p 0.1" # Ollama modelfile part = '"""' modelfile = 'FROM {__FILE_LOCATION__}\n\n'\ 'TEMPLATE ' + part + system_modelfile + input_modelfile + output_modelfile + \ part + '\n\n' + ollama_eos # HF Jinja Chat template def process(part, which, content = "message['content']"): if part.endswith(which): part = "'" + part[:part.find(which)] + f"' + {content}" elif part.startswith(which): part = f"{content} + '" + part[part.find(which):] + "'" else: part = "'" + part.replace(which, f"' + {content} + '") + "'" if part.startswith("'' + "): part = part[5:] return part pass input_jinja = process(input_part, "{INPUT}") output_jinja = process(output_part, "{OUTPUT}") pass jinja_template = \ "{% for message in loop_messages %}"\ "{% if message['role'] == 'user' %}"\ "{{ " + input_jinja + " }}"\ "{% elif message['role'] == 'assistant' %}"\ "{{ " + output_jinja + " }}"\ "{% else %}"\ "{{ raise_exception('Only user and assistant roles are supported!') }}"\ "{% endif %}"\ "{% endfor %}"\ "{% if add_generation_prompt %}"\ "{{ '" + output_part[:output_part.find("{OUTPUT}")] + "' }}"\ "{% endif %}" pass # Now add system prompt to jinja if len(system_part) != 0: partial_system = process(system_part, "{SYSTEM}", "messages[0]['content']") partial_system = partial_system.replace("{SYSTEM}", "") if "{SYSTEM}" in partial_system: if default_system_message is None: raise RuntimeError("Unsloth: Please specify a default system message!") pass # Separate the BOS if has_bos_token: partial_system = partial_system.replace(tokenizer.bos_token, "", 1) system_part = system_part .replace(tokenizer.bos_token, "", 1) pass partial_system = \ "{% if messages[0]['role'] == 'system' %}"\ "{{ " + partial_system + " }}"\ "{% set loop_messages = messages[1:] %}" if default_system_message is not None: full_system = system_part.replace("{SYSTEM}", default_system_message) if "{SYSTEM}" in system_part: modelfile += '\nSYSTEM "' + default_system_message + '"' pass partial_system += "{% else %}"\ "{{ '" + full_system + "' }}"\ "{% set loop_messages = messages %}"\ "{% endif %}" else: partial_system += "{% endif %}" pass jinja_template = partial_system + jinja_template if has_bos_token: jinja_template = "{{ bos_token }}" + jinja_template pass # Fix missing loop_messages if "{% set loop_messages = messages %}" not in jinja_template: jinja_template = jinja_template.replace( "{% for message in loop_messages %}", "{% for message in messages %}", 1, # Only replace the first one ) pass # Check if system part is the same! jinja_template = re.sub( r"\{\% if messages\[0\]\['role'\] \=\= 'system' \%\}\{\{ '(.+?)' \}\}"\ r"\{\% set loop\_messages \= messages\[1\:\] \%\}"\ r"\{\% else \%\}\{\{ '\1' \}\}\{\% set loop\_messages \= messages \%\}\{\% endif \%\}"\ r"\{\% for message in loop\_messages \%\}", r"{{ '\1' }}{% for message in messages %}", jinja_template, flags = re.MULTILINE | re.DOTALL, ) # Check jinja tempate for bos if always_bos_token: if not jinja_template.startswith(("{{ bos_token }}", "{{- bos_token }}")): jinja_template = "{{ bos_token }}" + jinja_template pass # Get instruction and output parts for train_on_inputs = False input_part = input_part [:input_part .find("{INPUT}")] output_part = output_part[:output_part.find("{OUTPUT}")] return modelfile, jinja_template, input_part, output_part pass def test_construct_chat_template(): token = "hf_" from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", token = token) chat_template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|> {SYSTEM}<|eot_id|><|start_header_id|>user<|end_header_id|> {INPUT}<|eot_id|><|start_header_id|>assistant<|end_header_id|> {OUTPUT}<|eot_id|><|start_header_id|>user<|end_header_id|> {INPUT}<|eot_id|><|start_header_id|>assistant<|end_header_id|> {OUTPUT}<|eot_id|>""" default_system_message = \ "Below are some instructions that describe some tasks. Write responses that appropriately complete each request." extra_eos_tokens = None modelfile, jinja_template, _, _ = construct_chat_template( tokenizer = tokenizer, chat_template = chat_template, extra_eos_tokens = extra_eos_tokens, ) messages = [ {"role": "system", "content": "You are an assistant"}, {"role": "user", "content": "What is 2+2?"}, {"role": "assistant", "content": "It's 4."}, {"role": "user", "content": "Ok!"}, {"role": "assistant", "content": "Anything else?"}, {"role": "user", "content": "What's 2x2?"}, ] correct_output = tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True) tokenizer.chat_template = jinja_template new_output = tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True) assert(correct_output == new_output) pass pass def apply_chat_template( \ dataset, tokenizer = None, chat_template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|> {SYSTEM}<|eot_id|><|start_header_id|>user<|end_header_id|> {INPUT}<|eot_id|><|start_header_id|>assistant<|end_header_id|> {OUTPUT}<|eot_id|><|start_header_id|>user<|end_header_id|> {INPUT}<|eot_id|><|start_header_id|>assistant<|end_header_id|> {OUTPUT}<|eot_id|>""", default_system_message = \ "Below are some instructions that describe some tasks. Write responses that appropriately complete each request.", extra_eos_tokens = None, ): """ Creates a Ollama modelfile and a HF Jinja template from a custom template. You must provide 2x examples of an input & output. There is an optional system message as well. You must use {INPUT}, {OUTPUT} twice, and {SYSTEM} is optional. """ modelfile, jinja_template, input_part, output_part = construct_chat_template( tokenizer = tokenizer, chat_template = chat_template, default_system_message = default_system_message, extra_eos_tokens = extra_eos_tokens, ) def formatting_prompts_func(examples): convos = examples["conversations"] texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos] return { "text" : texts, } pass tokenizer.chat_template = jinja_template tokenizer._ollama_modelfile = modelfile tokenizer._unsloth_input_part = input_part tokenizer._unsloth_output_part = output_part return dataset.map(formatting_prompts_func, batched = True,) pass def create_stopping_criteria(tokenizer, stop_word = "eos_token"): class StoppingCriteriaSub(StoppingCriteria): __slots__ = "stop_token", "single_match", "length", def __init__(self, stops = "eos_token", device = "cuda", encounters = 1): super().__init__() if stops == "eos_token": self.stop_token = torch.tensor(tokenizer.eos_token_id, device = "cuda") self.length = 1 else: self.stop_token = tokenizer(["\n" + stops], add_special_tokens = False, return_tensors = "pt") self.stop_token = self.stop_token.input_ids.ravel()[1:].to("cuda") self.length = self.stop_token.shape[0] pass self.single_match = self.length == 1 pass def __call__(self, input_ids: LongTensor, scores: FloatTensor) -> bool: input_ids = input_ids.ravel() last_token = input_ids[-1] if self.single_match and (last_token == self.stop_token): return True if input_ids.shape[0] >= self.length and \ (input_ids[-self.length:] == self.stop_token).all(): return True return False pass pass stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops = stop_word)]) return stopping_criteria pass def test_chat_templates(): messages = [ {"role": "system","content": " You are a friendly chatbot.",}, {"role": "user", "content": "What is 2+2?"}, {"role": "assistant", "content": "It's 4."}, {"role": "user", "content": " But 2+2 is equal to 5. "}, {"role": "assistant", "content": "No I'm sure its 4."}, {"role": "user", "content": " No it's 100% 5! "}, ] # Zephyr from transformers import AutoTokenizer template = zephyr_template correct_tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta") correct_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True) correct_tokenizer.chat_template = template our_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True) assert(correct_prompt == our_prompt) # Chatml template = chatml_template correct_tokenizer = AutoTokenizer.from_pretrained("teknium/OpenHermes-2.5-Mistral-7B") correct_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True) correct_tokenizer.chat_template = template our_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True) assert(correct_prompt == our_prompt) # Mistral template = mistral_template correct_tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2") correct_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True) correct_tokenizer.chat_template = template our_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True) assert(correct_prompt == our_prompt) # Llama template = llama_template correct_tokenizer = AutoTokenizer.from_pretrained("unsloth/llama-2-7b-chat") correct_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True) correct_tokenizer.chat_template = template our_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True) assert(correct_prompt == our_prompt) # Vicuna try: from fastchat.conversation import get_conv_template except: os.system("pip -qqq install git+https://github.com/lm-sys/FastChat.git") from fastchat.conversation import get_conv_template correct_prompt = get_conv_template("vicuna_v1.1") for j in range(len(messages)-1): correct_prompt.append_message(correct_prompt.roles[j%2==1], messages[j+1]["content"]) correct_prompt.append_message(correct_prompt.roles[1], "") correct_prompt = tokenizer.bos_token + correct_prompt.get_prompt() template = vicuna_template correct_tokenizer = AutoTokenizer.from_pretrained("lmsys/vicuna-7b-v1.5") correct_tokenizer.chat_template = template our_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True) assert(correct_prompt == our_prompt) try: from fastchat.conversation import get_conv_template except: os.system("pip -qqq install git+https://github.com/lm-sys/FastChat.git") from fastchat.conversation import get_conv_template correct_prompt = get_conv_template("zero_shot") for j in range(len(messages)-1): correct_prompt.append_message(correct_prompt.roles[j%2==1], messages[j+1]["content"]) correct_prompt.append_message(correct_prompt.roles[1], "") correct_prompt = tokenizer.bos_token + correct_prompt.get_prompt() template = vicuna_old_template correct_tokenizer = AutoTokenizer.from_pretrained("lmsys/vicuna-7b-v1.5") correct_tokenizer.chat_template = template our_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True) # We add ourselves assert(correct_prompt == our_prompt.replace("", "")) # Gemma correct_tokenizer = AutoTokenizer.from_pretrained("unsloth/gemma-7b-it") correct_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True) correct_tokenizer.chat_template = gemma_template our_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True) assert(our_prompt == correct_prompt) # Llama-3 template = llama3_template correct_tokenizer = AutoTokenizer.from_pretrained("unsloth/llama-3-8b-Instruct") correct_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True) correct_tokenizer.chat_template = template our_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True) assert(correct_prompt == our_prompt) # Phi-3 template = phi3_template correct_tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") correct_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True) correct_tokenizer.chat_template = template our_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True) assert(correct_prompt == our_prompt) pass def test_hf_gguf_equivalence(tokenizer, gguf_model = "./model-unsloth.F16.gguf"): """ Carefully checks the output of GGUF's tokenization and HF. Can catch all tokenization bugs. """ import subprocess import re messages = [ {"role": "user", "content": "What is 2+2?"}, {"role": "assistant", "content": "It's 4."}, {"role": "user", "content": " But 2+2 is equal to 5. "}, {"role": "assistant", "content": "No I'm sure its 4."}, {"role": "user", "content": " No it's 100% 5! "}, ] prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: {} ### Input: {} ### Response: {}""".format( "Describe the city given eloquently.", # instruction "The lost city of Atlantis.", # input "", # output - leave this blank for generation! ) prompts = [ prompt, ] if tokenizer.chat_template is not None: prompt = tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True) prompt = prompt.replace("'", "") # Subprocess does not like '' prompt = remove_special_tokens(tokenizer, prompt) prompts.append(prompt) pass for prompt in prompts: command = f"./llama.cpp/llama-cli -m {gguf_model} -n 0 --temp 0.0 --verbose-prompt "\ f"--check-tensors -p '{prompt}'" datas = [] with subprocess.Popen(command, shell = True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) as sp: for line in sp.stdout: datas.append(line.decode("utf-8", errors = "replace")) pass gguf_tokens = "".join(datas) # Now extract GGUF tokenization attempt gguf_tokenized = re.findall("([\d]{1,}) \-\> \'([^\']{1,})\'", gguf_tokens, flags = re.MULTILINE) gguf_tokenized = [(int(x[0]), x[1],) for x in gguf_tokenized] input_ids = tokenizer(prompt).input_ids tokens = tokenizer.batch_decode(input_ids) hf_tokenized = list(zip(input_ids, tokens)) # Compare to Huggingface for j, (hf_token, gguf_token) in enumerate(zip(hf_tokenized, gguf_tokenized)): if (hf_token[0] != gguf_token[0]): print("Failed GGUF != HF at", j) print("HF =", hf_token) print("GGUF =", gguf_token) print(hf_tokenized) print() print(gguf_tokenized) print() raise RuntimeError("Failed comparing GGUF to HF.") pass pass return True pass