# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = [
    "get_chat_template",
    "test_chat_templates",
    "test_hf_gguf_equivalence",
    "remove_special_tokens",

    "to_sharegpt",
    "standardize_sharegpt",
    "apply_chat_template",
    "train_on_responses_only",

    "test_construct_chat_template",
]

from transformers import StoppingCriteria, StoppingCriteriaList
from torch import LongTensor, FloatTensor
from transformers.models.llama.modeling_llama import logger
from .save import patch_saving_functions
import os
import shutil
from .tokenizer_utils import *
from .models._utils import patch_tokenizer
import re
from unsloth_zoo.dataset_utils import (
    train_on_responses_only,
)
CHAT_TEMPLATES = {}
DEFAULT_SYSTEM_MESSAGE = {}

# =========================================== Unsloth
# Unsloth efficient template leverages from Zephyr
unsloth_template = \
    "{{ bos_token }}"\
    "{% if messages[0]['role'] == 'system' %}"\
        "{{ messages[0]['content'] + '\n' }}"\
        "{% set loop_messages = messages[1:] %}"\
    "{% else %}"\
        "{{ '{system_message}' + '\n' }}"\
        "{% set loop_messages = messages %}"\
    "{% endif %}"\
    "{% for message in loop_messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ '>>> User: ' + message['content'] + '\n' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ '>>> Assistant: ' + message['content'] + eos_token + '\n' }}"\
        "{% else %}"\
            "{{ raise_exception('Only user and assistant roles are supported!') }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '>>> Assistant: ' }}"\
    "{% endif %}"
pass

unsloth_ollama = \
'''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}{{ .System }}
{{ end }}{{ if .Prompt }}>>> User: {{ .Prompt }}
{{ end }}>>> Assistant: {{ .Response }}{__EOS_TOKEN__}
"""
PARAMETER stop "{__EOS_TOKEN__}"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
SYSTEM """You are a helpful assistant to the user"""
'''

unsloth_eos_token = "eos_token"
CHAT_TEMPLATES["unsloth"] = (unsloth_template, unsloth_eos_token, False, unsloth_ollama,)
DEFAULT_SYSTEM_MESSAGE["unsloth"] = "You are a helpful assistant to the user"
pass

# =========================================== Zephyr
# Zephyr has no BOS!
zephyr_template = \
    "{% for message in messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ '<|user|>\n' + message['content'] + eos_token + '\n' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ '<|assistant|>\n' + message['content'] + eos_token + '\n' }}"\
        "{% else %}"\
            "{{ '<|system|>\n' + message['content'] + eos_token + '\n' }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '<|assistant|>\n' }}"\
    "{% endif %}"
pass

zephyr_ollama = \
'''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}<|system|>
{{ .System }}{__EOS_TOKEN__}
{{ end }}{{ if .Prompt }}<|user|>
{{ .Prompt }}{__EOS_TOKEN__}
{{ end }}<|assistant|>
{{ .Response }}{__EOS_TOKEN__}
"""
PARAMETER stop "{__EOS_TOKEN__}"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

zephyr_eos_token = "eos_token"
CHAT_TEMPLATES["zephyr"] = (zephyr_template, zephyr_eos_token, False, zephyr_ollama,)
DEFAULT_SYSTEM_MESSAGE["zephyr"] = None # No system message in Zephyr
pass

# =========================================== ChatML
# ChatML has no BOS and not EOS! Rather <|im_start|> and <|im_end|> acts as BOS / EOS.
chatml_template = \
    "{% for message in messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{'<|im_start|>user\n' + message['content'] + '<|im_end|>\n'}}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{'<|im_start|>assistant\n' + message['content'] + '<|im_end|>\n' }}"\
        "{% else %}"\
            "{{ '<|im_start|>system\n' + message['content'] + '<|im_end|>\n' }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '<|im_start|>assistant\n' }}"\
    "{% endif %}"
pass

chatml_ollama = \
'''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}{{ if .Prompt }}<|im_start|>user
{{ .Prompt }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ .Response }}<|im_end|>
"""
PARAMETER stop "<|im_start|>"
PARAMETER stop "<|im_end|>"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

chatml_eos_token = "<|im_end|>"
CHAT_TEMPLATES["chatml"] = (chatml_template, chatml_eos_token, True, chatml_ollama,)
DEFAULT_SYSTEM_MESSAGE["chatml"] = None # No system message in ChatML
pass

# =========================================== Mistral-1
# Mistral Instruct doesn't allow system prompts, so we append it to the user message.
mistral_template = \
    "{{ bos_token }}"\
    "{% if messages[0]['role'] == 'system' %}"\
        "{% if messages[1]['role'] == 'user' %}"\
            "{{ '[INST] ' + messages[0]['content'] + ' ' + messages[1]['content'] + ' [/INST]' }}"\
            "{% set loop_messages = messages[2:] %}"\
        "{% else %}"\
            "{{ '[INST] ' + messages[0]['content'] + ' [/INST]' }}"\
            "{% set loop_messages = messages[1:] %}"\
        "{% endif %}"\
    "{% else %}"\
        "{% set loop_messages = messages %}"\
    "{% endif %}"\
    "{% for message in loop_messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ '[INST] ' + message['content'] + ' [/INST]' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ message['content'] + eos_token }}"\
        "{% else %}"\
            "{{ raise_exception('Only user and assistant roles are supported!') }}"\
        "{% endif %}"\
    "{% endfor %}"
pass

# Ollama from https://www.ollama.com/library/mistral
mistral_ollama = \
'''
FROM {__FILE_LOCATION__}
TEMPLATE """[INST] {{ if .System }}{{ .System }} {{ end }}{{ .Prompt }} [/INST]"""
PARAMETER stop "{__EOS_TOKEN__}"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

mistral_eos_token = "eos_token"
CHAT_TEMPLATES["mistral"] = (mistral_template, mistral_eos_token, False, mistral_ollama,)
DEFAULT_SYSTEM_MESSAGE["mistral"] = None # No system message in Mistral
pass

# =========================================== Llama-2
# Adds BOS to every convo! And weird <<SYS>> system messages.
llama_template = \
    "{% if messages[0]['role'] == 'system' %}"\
        "{% if messages[1]['role'] == 'user' %}"\
            "{{ bos_token + '[INST] <<SYS>>\n' + messages[0]['content'] + '\n<</SYS>>\n\n' + messages[1]['content'] + ' [/INST]' }}"\
            "{% set loop_messages = messages[2:] %}"\
        "{% else %}"\
            "{{ bos_token + '[INST] ' + messages[0]['content'] + ' [/INST]' }}"\
            "{% set loop_messages = messages[1:] %}"\
        "{% endif %}"\
    "{% else %}"\
        "{% set loop_messages = messages %}"\
    "{% endif %}"\
    "{% for message in loop_messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ bos_token + '[INST] ' + message['content'].strip() + ' [/INST]' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ ' ' + message['content'].strip() + ' ' + eos_token }}"\
        "{% else %}"\
            "{{ raise_exception('Only user and assistant roles are supported!') }}"\
        "{% endif %}"\
    "{% endfor %}"
pass

# Ollama from https://www.ollama.com/library/llama3
llama_ollama = \
'''
FROM {__FILE_LOCATION__}
TEMPLATE """[INST] <<SYS>>{{ .System }}<</SYS>>

{{ .Prompt }} [/INST]"""
PARAMETER stop "{__EOS_TOKEN__}"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

llama_eos_token = "eos_token"
CHAT_TEMPLATES["llama"] = (llama_template, llama_eos_token, False, llama_ollama,)
DEFAULT_SYSTEM_MESSAGE["llama"] = None # No system message in Llama
pass

# ===========================================  Vicuna
# https://github.com/lm-sys/FastChat/blob/main/docs/vicuna_weights_version.md#prompt-template
vicuna_template = \
    "{{ bos_token }}"\
    "{% if messages[0]['role'] == 'system' %}"\
        "{{ messages[0]['content'] + ' ' }}"\
        "{% set loop_messages = messages[1:] %}"\
    "{% else %}"\
        "{{ '{system_message}' + ' ' }}"\
        "{% set loop_messages = messages %}"\
    "{% endif %}"\
    "{% for message in loop_messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ 'USER: ' + message['content'] + ' ' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ 'ASSISTANT: ' + message['content'] + eos_token }}"\
        "{% else %}"\
            "{{ raise_exception('Only user and assistant roles are supported!') }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ 'ASSISTANT:' }}"\
    "{% endif %}"
pass

# Ollama from https://www.ollama.com/library/vicuna
vicuna_ollama = \
'''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}{{ .System }} {{ end }}{{ if .Prompt }}USER: {{ .Prompt }} {{ end }}ASSISTANT: {{ .Response }} {__EOS_TOKEN__}"""
PARAMETER stop "{__EOS_TOKEN__}"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

vicuna_eos_token = "eos_token"
CHAT_TEMPLATES["vicuna"] = (vicuna_template, vicuna_eos_token, False, vicuna_ollama,)
DEFAULT_SYSTEM_MESSAGE["vicuna"] = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions."
pass

# =========================================== Vicuna Old
# https://github.com/lm-sys/FastChat/blob/main/docs/vicuna_weights_version.md#prompt-template
vicuna_old_template = \
    "{{ bos_token }}"\
    "{% if messages[0]['role'] == 'system' %}"\
        "{{ messages[0]['content'] + '\n' }}"\
        "{% set loop_messages = messages[1:] %}"\
    "{% else %}"\
        "{{ '{system_message}' + '\n' }}"\
        "{% set loop_messages = messages %}"\
    "{% endif %}"\
    "{% for message in loop_messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ '### Human: ' + message['content'] + '\n' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ '### Assistant: ' + message['content'] + eos_token + '\n' }}"\
        "{% else %}"\
            "{{ raise_exception('Only user and assistant roles are supported!') }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '### Assistant:' }}"\
    "{% endif %}"
pass

vicuna_old_ollama = \
'''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}{{ .System }}
{{ end }}{{ if .Prompt }}### Human: {{ .Prompt }}
{{ end }}### Assistant: {{ .Response }}{__EOS_TOKEN__}
"""
PARAMETER stop "{__EOS_TOKEN__}"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
SYSTEM """A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."""
'''

vicuna_old_eos_token = "eos_token"
CHAT_TEMPLATES["vicuna_old"] = (vicuna_old_template, vicuna_old_eos_token, False, vicuna_old_ollama,)
DEFAULT_SYSTEM_MESSAGE["vicuna_old"] = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human\\'s questions."

CHAT_TEMPLATES["vicuna old"] = CHAT_TEMPLATES["vicuna_old"]
DEFAULT_SYSTEM_MESSAGE["vicuna old"] = DEFAULT_SYSTEM_MESSAGE["vicuna_old"]
pass

# =========================================== Alpaca multi turn
# https://github.com/tatsu-lab/stanford_alpaca Changed for multi-turn convos
alpaca_template = \
    "{{ bos_token }}"\
    "{% if messages[0]['role'] == 'system' %}"\
        "{{ messages[0]['content'] + '\n\n' }}"\
        "{% set loop_messages = messages[1:] %}"\
    "{% else %}"\
        "{{ '{system_message}' + '\n\n' }}"\
        "{% set loop_messages = messages %}"\
    "{% endif %}"\
    "{% for message in loop_messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ '### Instruction:\n' + message['content'] + '\n\n' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ '### Response:\n' + message['content'] + eos_token + '\n\n' }}"\
        "{% else %}"\
            "{{ raise_exception('Only user and assistant roles are supported!') }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '### Response:\n' }}"\
    "{% endif %}"
pass

alpaca_ollama = \
'''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}{{ .System }}

{{ end }}{{ if .Prompt }}### Instruction:
{{ .Prompt }}{{ end }}

### Response:
{{ .Response }}{__EOS_TOKEN__}

"""
PARAMETER stop "{__EOS_TOKEN__}"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
SYSTEM """Below are some instructions that describe some tasks. Write responses that appropriately complete each request."""
'''

alpaca_eos_token = "eos_token"
CHAT_TEMPLATES["alpaca"] = (alpaca_template, alpaca_eos_token, False, alpaca_ollama,)
DEFAULT_SYSTEM_MESSAGE["alpaca"] = "Below are some instructions that describe some tasks. Write responses that appropriately complete each request."
pass

# =========================================== Gemma
# https://huggingface.co/google/gemma-7b-it
# Notice we must use |trim for lstrip and rstrip. <start_of_turn> maps to 106.
# <end_of_turn> maps to 107. user and model are normal 1 word tokens.
gemma_template = \
    "{{ bos_token }}"\
    "{% if messages[0]['role'] == 'system' %}"\
        "{{'<start_of_turn>user\n' + messages[0]['content'] | trim + ' ' + messages[1]['content'] | trim + '<end_of_turn>\n'}}"\
        "{% set messages = messages[2:] %}"\
    "{% endif %}"\
    "{% for message in messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{'<start_of_turn>user\n' + message['content'] | trim + '<end_of_turn>\n'}}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{'<start_of_turn>model\n' + message['content'] | trim + '<end_of_turn>\n' }}"\
        "{% else %}"\
            "{{ raise_exception('Only user and assistant roles are supported!') }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '<start_of_turn>model\n' }}"\
    "{% endif %}"
pass

# Ollama from https://www.ollama.com/library/gemma
gemma_ollama = \
'''
FROM {__FILE_LOCATION__}
TEMPLATE """<start_of_turn>user
{{ if .System }}{{ .System }} {{ end }}{{ .Prompt }}<end_of_turn>
<start_of_turn>model
{{ .Response }}<end_of_turn>
"""
PARAMETER repeat_penalty 1
PARAMETER stop "<start_of_turn>"
PARAMETER stop "<end_of_turn>"
PARAMETER penalize_newline false
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

gemma_eos_token = "<end_of_turn>"
CHAT_TEMPLATES["gemma"] = (gemma_template, gemma_eos_token, True, gemma_ollama,)
DEFAULT_SYSTEM_MESSAGE["gemma"] = None # No system message in Gemma
pass

# =========================================== Gemma with ChatML instead
# We find using <eos> is still more appropriate!
gemma_chatml_template = "{{ bos_token }}" + chatml_template
pass

gemma_chatml_ollama = \
'''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}{{ if .Prompt }}<|im_start|>user
{{ .Prompt }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ .Response }}<|im_end|>
"""
PARAMETER repeat_penalty 1
PARAMETER stop "<|im_start|>"
PARAMETER stop "<|im_end|>"
PARAMETER penalize_newline false
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

gemma_chatml_eos_token = (
    {"<start_of_turn>" : "<|im_start|>", "<eos>" : "<|im_end|>"},
    "<|im_end|>",
)
CHAT_TEMPLATES["gemma_chatml"] = (gemma_chatml_template, gemma_chatml_eos_token, True, gemma_chatml_ollama,)
DEFAULT_SYSTEM_MESSAGE["gemma_chatml"] = None # No system message in Gemma
pass

# =========================================== Gemma 2
# Same as Gemma 1, but with sliding window attention!
# https://ollama.com/library/gemma2/blobs/6522ca797f47
gemma2_template = gemma_template
gemma2_ollama = gemma_ollama + "PARAMETER num_ctx 4096\n"
gemma2_eos_token = "<end_of_turn>"
CHAT_TEMPLATES["gemma2"] = (gemma2_template, gemma2_eos_token, True, gemma2_ollama,)
DEFAULT_SYSTEM_MESSAGE["gemma2"] = None # No system message in Gemma 2

# =========================================== Gemma 2 with ChatML instead
gemma2_chatml_template = gemma_chatml_template
gemma2_chatml_ollama = gemma_chatml_ollama + "PARAMETER num_ctx 4096\n"
gemma2_chatml_eos_token = gemma_chatml_eos_token
CHAT_TEMPLATES["gemma2_chatml"] = (gemma2_chatml_template, gemma2_chatml_eos_token, True, gemma2_chatml_ollama,)
DEFAULT_SYSTEM_MESSAGE["gemma2_chatml"] = None # No system message in Gemma 2
pass

# =========================================== Llama-3
# Weirdly \n\n is needed?
llama3_template = \
    "{{ bos_token }}"\
    "{% for message in messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ '<|start_header_id|>user<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' }}"\
        "{% else %}"\
            "{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}"\
    "{% endif %}"
pass

# Ollama from https://www.ollama.com/library/llama3
llama3_ollama = \
'''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>

{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>

{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>

{{ .Response }}<|eot_id|>"""
PARAMETER stop "<|start_header_id|>"
PARAMETER stop "<|end_header_id|>"
PARAMETER stop "<|eot_id|>"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

llama3_template_eos_token = "eos_token"

CHAT_TEMPLATES["llama-3"] = (llama3_template, llama3_template_eos_token, False, llama3_ollama,)
DEFAULT_SYSTEM_MESSAGE["llama-3"] = None # No system message in Llama-3

CHAT_TEMPLATES["llama3"] = (llama3_template, llama3_template_eos_token, False, llama3_ollama,)
DEFAULT_SYSTEM_MESSAGE["llama3"] = None # No system message in Llama-3
pass


# =========================================== Phi-3
# "{{ bos_token }}"\ # Phi-3.5 removes BOS?
phi3_template = \
    "{% for message in messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{'<|user|>\n' + message['content'] + '<|end|>\n'}}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}"\
        "{% else %}"\
            "{{'<|' + message['role'] + '|>\n' + message['content'] + '<|end|>\n'}}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '<|assistant|>\n' }}"\
    "{% endif %}"
pass

# Ollama from https://www.ollama.com/library/phi3
phi3_ollama = \
'''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}<|system|>
{{ .System }}<|end|>
{{ end }}{{ if .Prompt }}<|user|>
{{ .Prompt }}<|end|>
{{ end }}<|assistant|>
{{ .Response }}<|end|>
"""
PARAMETER stop "<|end|>"
PARAMETER stop "<|user|>"
PARAMETER stop "<|assistant|>"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

phi3_template_eos_token = "<|end|>"
CHAT_TEMPLATES["phi-3"]   = (phi3_template, phi3_template_eos_token, False, phi3_ollama,)
DEFAULT_SYSTEM_MESSAGE["phi-3"] = None # No system message in Phi-3

CHAT_TEMPLATES["phi-35"]  = CHAT_TEMPLATES["phi-3"]
DEFAULT_SYSTEM_MESSAGE["phi-35"] = None # No system message in Phi-3.5

CHAT_TEMPLATES["phi-3.5"] = CHAT_TEMPLATES["phi-3"]
DEFAULT_SYSTEM_MESSAGE["phi-3.5"] = None # No system message in Phi-3.5
pass

# =========================================== Llama-3.1
"""
No trimming in Llama 3.1 Instruct!
Also an extra newline for Cutting Knowledge Date
See https://colab.research.google.com/drive/1Xpqq5xpIgO-B00MQ-UccYMwN2J8QFgBM?usp=sharing

Also should be

import datetime
tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,
    tokenize = False,
    date_string = datetime.today().strftime("%d %B %Y")),
)
"""

llama31_template = \
"""{{- bos_token }}
{%- if custom_tools is defined %}
    {%- set tools = custom_tools %}
{%- endif %}
{%- if not tools_in_user_message is defined %}
    {%- set tools_in_user_message = true %}
{%- endif %}
{%- if not date_string is defined %}
    {%- set date_string = "26 July 2024" %}
{%- endif %}
{%- if not tools is defined %}
    {%- set tools = none %}
{%- endif %}

{#- This block extracts the system message, so we can slot it into the right place. #}
{%- if messages[0]['role'] == 'system' %}
    {%- set system_message = messages[0]['content'] %}
    {%- set messages = messages[1:] %}
{%- else %}
    {%- set system_message = "{system_message}" %}
{%- endif %}

{#- System message + builtin tools #}
{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
{%- if builtin_tools is defined or tools is not none %}
    {{- "Environment: ipython\n" }}
{%- endif %}
{%- if builtin_tools is defined %}
    {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}}
{%- endif %}
{{- "Cutting Knowledge Date: December 2023\n" }}
{{- "Today Date: " + date_string + "\n\n" }}
{%- if tools is not none and not tools_in_user_message %}
    {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
    {{- "Do not use variables.\n\n" }}
    {%- for t in tools %}
        {{- t | tojson(indent=4) }}
        {{- "\n\n" }}
    {%- endfor %}
{%- endif %}
{{- system_message }}
{{- "<|eot_id|>" }}

{#- Custom tools are passed in a user message with some extra guidance #}
{%- if tools_in_user_message and not tools is none %}
    {#- Extract the first user message so we can plug it in here #}
    {%- if messages | length != 0 %}
        {%- set first_user_message = messages[0]['content'] %}
        {%- set messages = messages[1:] %}
    {%- else %}
        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
{%- endif %}
    {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
    {{- "Given the following functions, please respond with a JSON for a function call " }}
    {{- "with its proper arguments that best answers the given prompt.\n\n" }}
    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
    {{- "Do not use variables.\n\n" }}
    {%- for t in tools %}
        {{- t | tojson(indent=4) }}
        {{- "\n\n" }}
    {%- endfor %}
    {{- first_user_message + "<|eot_id|>"}}
{%- endif %}

{%- for message in messages %}
    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] + '<|eot_id|>' }}
    {%- elif 'tool_calls' in message %}
        {%- if not message.tool_calls|length == 1 %}
            {{- raise_exception("This model only supports single tool-calls at once!") }}
        {%- endif %}
        {%- set tool_call = message.tool_calls[0].function %}
        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}
            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
            {{- "<|python_tag|>" + tool_call.name + ".call(" }}
            {%- for arg_name, arg_val in tool_call.arguments | items %}
                {{- arg_name + '="' + arg_val + '"' }}
                {%- if not loop.last %}
                    {{- ", " }}
                {%- endif %}
                {%- endfor %}
            {{- ")" }}
        {%- else  %}
            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
            {{- '{"name": "' + tool_call.name + '", ' }}
            {{- '"parameters": ' }}
            {{- tool_call.arguments | tojson }}
            {{- "}" }}
        {%- endif %}
        {%- if builtin_tools is defined %}
            {#- This means we're in ipython mode #}
            {{- "<|eom_id|>" }}
        {%- else %}
            {{- "<|eot_id|>" }}
        {%- endif %}
    {%- elif message.role == "tool" or message.role == "ipython" %}
        {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
        {%- if message.content is mapping or message.content is iterable %}
            {{- message.content | tojson }}
        {%- else %}
            {{- message.content }}
        {%- endif %}
        {{- "<|eot_id|>" }}
    {%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
{%- endif %}
"""
pass

# Ollama from https://ollama.com/library/llama3.1 (needs updating!)
llama31_ollama = \
'''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .Messages }}
{{- if or .System .Tools }}<|start_header_id|>system<|end_header_id|>
{{- if .System }}

{{ .System }}
{{- end }}
{{- if .Tools }}

You are a helpful assistant with tool calling capabilities. When you receive a tool call response, use the output to format an answer to the original use question.
{{- end }}
{{- end }}<|eot_id|>
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 }}
{{- if eq .Role "user" }}<|start_header_id|>user<|end_header_id|>
{{- if and $.Tools $last }}

Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.

Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. Do not use variables.

{{ $.Tools }}
{{- end }}

{{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|>

{{ end }}
{{- else if eq .Role "assistant" }}<|start_header_id|>assistant<|end_header_id|>
{{- if .ToolCalls }}

{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "parameters": {{ .Function.Arguments }}}{{ end }}
{{- else }}

{{ .Content }}{{ if not $last }}<|eot_id|>{{ end }}
{{- end }}
{{- else if eq .Role "tool" }}<|start_header_id|>ipython<|end_header_id|>

{{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|>

{{ end }}
{{- end }}
{{- end }}
{{- else }}
{{- if .System }}<|start_header_id|>system<|end_header_id|>

{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>

{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>

{{ end }}{{ .Response }}{{ if .Response }}<|eot_id|>{{ end }}"""
PARAMETER stop "<|start_header_id|>"
PARAMETER stop "<|end_header_id|>"
PARAMETER stop "<|eot_id|>"
PARAMETER stop "<|eom_id|>"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

llama31_template_eos_token = "eos_token"
CHAT_TEMPLATES["llama-3.1"] = (llama31_template, llama31_template_eos_token, False, llama31_ollama,)
DEFAULT_SYSTEM_MESSAGE["llama-3.1"] = "" # Llama3.1 default system message is empty + the dates

CHAT_TEMPLATES["llama-31"]  = (llama31_template, llama31_template_eos_token, False, llama31_ollama,)
DEFAULT_SYSTEM_MESSAGE["llama-31"] = "" # Llama3.1 default system message is empty + the dates
pass


# =========================================== Qwen 2.5
qwen25_template = \
"""{%- if tools %}
    {{- \'<|im_start|>system\\n\' }}
    {%- if messages[0][\'role\'] == \'system\' %}
        {{- messages[0][\'content\'] }}
    {%- else %}
        {{- \'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\' }}
    {%- endif %}
    {{- "\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>" }}
    {%- for tool in tools %}
        {{- "\\n" }}
        {{- tool | tojson }}
    {%- endfor %}
    {{- "\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\"name\\": <function-name>, \\"arguments\\": <args-json-object>}\\n</tool_call><|im_end|>\\n" }}\n{%- else %}
    {%- if messages[0][\'role\'] == \'system\' %}
        {{- \'<|im_start|>system\\n\' + messages[0][\'content\'] + \'<|im_end|>\\n\' }}
    {%- else %}
        {{- \'<|im_start|>system\\n{system_message}<|im_end|>\\n\' }}
    {%- endif %}\n{%- endif %}\n{%- for message in messages %}
    {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
        {{- \'<|im_start|>\' + message.role + \'\\n\' + message.content + \'<|im_end|>\' + \'\\n\' }}
    {%- elif message.role == "assistant" %}
        {{- \'<|im_start|>\' + message.role }}
        {%- if message.content %}
            {{- \'\\n\' + message.content }}
        {%- endif %}
        {%- for tool_call in message.tool_calls %}
            {%- if tool_call.function is defined %}
                {%- set tool_call = tool_call.function %}
            {%- endif %}
            {{- \'\\n<tool_call>\\n{"name": "\' }}
            {{- tool_call.name }}
            {{- \'", "arguments": \' }}
            {{- tool_call.arguments | tojson }}
            {{- \'}\\n</tool_call>\' }}
        {%- endfor %}
        {{- \'<|im_end|>\\n\' }}
    {%- elif message.role == "tool" %}
        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}            {{- \'<|im_start|>user\' }}
        {%- endif %}
        {{- \'\\n<tool_response>\\n\' }}
        {{- message.content }}
        {{- \'\\n</tool_response>\' }}
        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
            {{- \'<|im_end|>\\n\' }}
        {%- endif %}
    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}
    {{- \'<|im_start|>assistant\\n\' }}
{%- endif %}
"""


# Ollama from https://ollama.com/library/qwen2.5/blobs/eb4402837c78
qwen25_ollama = \
'''
FROM {__FILE_LOCATION__}
TEMPLATE """{{- if .Messages }}
{{- if or .System .Tools }}<|im_start|>system
{{- if .System }}
{{ .System }}
{{- end }}
{{- if .Tools }}

# Tools

You may call one or more functions to assist with the user query.

You are provided with function signatures within <tools></tools> XML tags:
<tools>
{{- range .Tools }}
{"type": "function", "function": {{ .Function }}}
{{- end }}
</tools>

For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>
{{- end }}<|im_end|>
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if eq .Role "user" }}<|im_start|>user
{{ .Content }}<|im_end|>
{{ else if eq .Role "assistant" }}<|im_start|>assistant
{{ if .Content }}{{ .Content }}
{{- else if .ToolCalls }}<tool_call>
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{ end }}</tool_call>
{{- end }}{{ if not $last }}<|im_end|>
{{ end }}
{{- else if eq .Role "tool" }}<|im_start|>user
<tool_response>
{{ .Content }}
</tool_response><|im_end|>
{{ end }}
{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
{{ end }}
{{- end }}
{{- else }}
{{- if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}{{ if .Prompt }}<|im_start|>user
{{ .Prompt }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}"""
PARAMETER stop "<|im_end|>"
PARAMETER stop "<|endoftext|>"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

qwen25_template_eos_token = "eos_token"
qwen25_default_system_message = "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." 
CHAT_TEMPLATES["qwen-2.5"] = (qwen25_template, qwen25_template_eos_token, False, qwen25_ollama,)
DEFAULT_SYSTEM_MESSAGE["qwen-2.5"] = qwen25_default_system_message # No system message in Qwen 2.5

CHAT_TEMPLATES["qwen-25"]  = (qwen25_template, qwen25_template_eos_token, False, qwen25_ollama,)
DEFAULT_SYSTEM_MESSAGE["qwen-25"] = qwen25_default_system_message # No system message in Qwen 2.5

CHAT_TEMPLATES["qwen25"]   = (qwen25_template, qwen25_template_eos_token, False, qwen25_ollama,)
DEFAULT_SYSTEM_MESSAGE["qwen25"] = qwen25_default_system_message # No system message in Qwen 2.5

CHAT_TEMPLATES["qwen2.5"]  = (qwen25_template, qwen25_template_eos_token, False, qwen25_ollama,)
DEFAULT_SYSTEM_MESSAGE["qwen2.5"] = qwen25_default_system_message # No system message in Qwen 2.5
pass

def _change_system_message(template: str, type_chat_template: str, system_message: str = None):
    system_message_pattern = r"\{system_message\}"
    
    # For predefined templates, check if default system message exists
    default_system_message = DEFAULT_SYSTEM_MESSAGE.get(f"{type_chat_template}", None)
    if default_system_message is None:
        if system_message is not None:
            logger.warning_once(
                f"Unsloth: You tried to change the system message for {type_chat_template}, "
                "but it doesn't have a default system message. "
                "You need to manually add the system message in your data."
            )
        return template, system_message
    pass
    
    # For custom templates
    if type_chat_template is None:
        has_placeholder = re.search(system_message_pattern, template) is not None
        
        if has_placeholder:
            if system_message is None:
                raise ValueError("Unsloth: You need to provide a system message for custom templates.")
            new_template = re.sub(system_message_pattern, system_message, template)
            return new_template, system_message
        
        return template, system_message
    pass
        
    # For predefined templates with default system message
    message_to_use = system_message if system_message is not None else default_system_message
    new_template = re.sub(system_message_pattern, message_to_use, template)
    
    return new_template, message_to_use
pass


def get_chat_template(
    tokenizer,
    chat_template = "chatml",
    mapping = {"role" : "role", "content" : "content", "user" : "user", "assistant" : "assistant"},
    map_eos_token = True,
    system_message = None,
):
    assert(type(map_eos_token) is bool)
    old_tokenizer = tokenizer

    IS_GEMMA = False
    if tokenizer.__class__.__name__.startswith("Gemma"):
        if chat_template == "chatml": chat_template = "gemma_chatml"
        IS_GEMMA = True
    pass

    # We add a check for Llama-3
    # if chat_template == "llama-3":
    #     tokenizer._using_llama3_template = True
    # else:
    #     llama3_tokens = set(["<|end_header_id|>", "<|eot_id|>", "<|start_header_id|>"])
    #     check_llama3_tokens = llama3_tokens & set(str(x) for x in tokenizer.added_tokens_decoder.values())
    #     if len(check_llama3_tokens) == len(llama3_tokens):
    #         tokenizer._using_llama3_template = True
    #     pass
    # pass

    # We first check if the tokenizer is a fast one. If not, we cannot convert this!
    is_fast_tokenizer = getattr(tokenizer, "is_fast", False)
    old_padding_side = tokenizer.padding_side

    same_padding_token = False
    type_chat_template = None
    
    if type(chat_template) in (list, tuple,):
        # For changing system message later
        # Since it's not supported yet, we will raise an error first!
        type_chat_template = chat_template[0].lower()
        chat_template, stop_word = chat_template
        assert(type(chat_template) is str)
        assert(type(stop_word) is str)
        ollama_modelfile = None

    elif type(chat_template) is str:
        # For changing system message later
        type_chat_template = chat_template.lower()

        chat_template, stop_word, yes_map_eos_token, ollama_modelfile = CHAT_TEMPLATES[chat_template]

        # Check mapping to eos_token
        if not map_eos_token and yes_map_eos_token: map_eos_token = True
        if not yes_map_eos_token and map_eos_token: map_eos_token = False

        if type(stop_word) in (list, tuple,):
            token_mapping, stop_word = stop_word
            assert(type(token_mapping) is dict)
        else:
            token_mapping = None

        assert(type(stop_word) is str)

        # Check fast tokenizer
        if not is_fast_tokenizer:
            print(
                "Unsloth: Not a fast tokenizer, so can't process it as of yet :(\n"\
                "Please log a Github issue if you want this as a new feature!\n"\
                "Your chat template will still work, but it won't add or edit tokens."
            )

        elif token_mapping is not None:
            # token_mapping = {"<start_of_turn>" : "<|im_start|>", "<end_of_turn>" : "<|im_end|>"}
            # For Gemma :)

            string_vocab = tokenizer._tokenizer.to_str()

            skipped = 0
            for old_token, new_token in token_mapping.items():
                old_count = string_vocab.count(f'"{old_token}"')
                new_count = string_vocab.count(f'"{new_token}"')
                if new_count != 0:
                    print(f"{new_token} is already a token. Skipping.")
                    skipped += 1
                elif old_count == 0:
                    raise RuntimeError(f"{old_token} was not part of the tokenizer!")
                else:
                    string_vocab = string_vocab.replace(f'"{old_token}"', f'"{new_token}"')
                pass
            pass

            if map_eos_token and (not stop_word in token_mapping.values()):
                # Do not map 107 = <|im_end|> and 1 = <|im_end|>. This will reduce the vocab size by 1
                logger.warning_once(f"Unsloth: Will map {stop_word} to EOS = {tokenizer.eos_token}.")
                string_vocab = string_vocab.replace(tokenizer.eos_token, stop_word)
            pass

            if skipped != len(token_mapping):
                new_tokenizer = tokenizer._tokenizer.from_str(string_vocab)

                # Careful on pad_token
                old_pad_token = tokenizer.pad_token
                if old_pad_token == tokenizer.eos_token:
                    old_pad_token = stop_word
                    same_padding_token = True
                pass

                if map_eos_token:
                    new_tokenizer = tokenizer.__class__(
                        tokenizer_object = new_tokenizer,
                        eos_token = stop_word,
                        pad_token = old_pad_token,
                    )
                else:
                    new_tokenizer = tokenizer.__class__(
                        tokenizer_object = new_tokenizer,
                        pad_token = old_pad_token,
                    )
                pass

                # Must fix the sentence piece tokenizer since there's no tokenizer.model file!
                tokenizer = fix_sentencepiece_tokenizer(tokenizer, new_tokenizer, token_mapping,)
            else:
                pass

        elif map_eos_token and (stop_word != "eos_token"):
            logger.warning_once(f"Unsloth: Will map {stop_word} to EOS = {tokenizer.eos_token}.")

            # Replaces the old EOS token with a new one.
            # Useful for ChatML <|im_end|> for example.
            # Usually we train 2 more tokens <|im_start|> and <|im_end|>
            # But training the lm_head and embeddings are slow!
            # This is a HACK!
            # Idea from https://huggingface.co/cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser

            old_bos_token = getattr(tokenizer, "bos_token", None)
            old_eos_token = getattr(tokenizer, "eos_token", None)
            old_pad_token = getattr(tokenizer, "pad_token", None)
            old_unk_token = getattr(tokenizer, "unk_token", None)

            string_vocab = tokenizer._tokenizer.to_str()
            # First check if new stop_word is in the tokenizer
            if stop_word in string_vocab:
                # We shall swap them around
                temporary_stop_token = "<|:__TEMP//STOP//TOKEN__:|>"
                string_vocab = string_vocab.replace(old_eos_token, temporary_stop_token)
                string_vocab = string_vocab.replace(stop_word, old_eos_token)
                string_vocab = string_vocab.replace(temporary_stop_token, stop_word)
            else:
                string_vocab = string_vocab.replace(old_eos_token, stop_word)
            pass
            new_tokenizer = tokenizer._tokenizer.from_str(string_vocab)

            # Careful on pad_token
            if old_pad_token == old_eos_token:
                old_pad_token = stop_word
                same_padding_token = True
            pass

            new_tokenizer = tokenizer.__class__(
                tokenizer_object = new_tokenizer,
                bos_token = old_bos_token,
                eos_token = stop_word,
                unk_token = old_unk_token,
                pad_token = old_pad_token,
            )

            # Must fix the sentence piece tokenizer since there's no tokenizer.model file!
            token_mapping = { old_eos_token : stop_word, }
            tokenizer = fix_sentencepiece_tokenizer(tokenizer, new_tokenizer, token_mapping,)
        pass

    else:
        raise TypeError(
            f"Unsloth: `chat_template` must be a tuple of (your_template, eos_token,) or one of\n"\
            f"{CHAT_TEMPLATES.keys()}"
        )
    pass

    # Careful on Gemma
    # bos_token is a must or else losses become too high
    if IS_GEMMA and not chat_template.startswith(("{{ bos_token }}", "{{- bos_token }}")):
        chat_template = "{{ bos_token }}" + chat_template
    pass

    # For ShareGPT role -> from and content -> value
    new_chat_template = chat_template\
        .replace("'role'",      "'" + mapping["role"]      + "'")\
        .replace("'content'",   "'" + mapping["content"]   + "'")\
        .replace("'user'",      "'" + mapping["user"]      + "'")\
        .replace("'assistant'", "'" + mapping["assistant"] + "'")

    _, tokenizer = patch_tokenizer(model = None, tokenizer = tokenizer)
    tokenizer.padding_side = old_padding_side

    # If not normal HF, we add a check to make old templates work
    if mapping != {"role" : "role", "content" : "content", "user" : "user", "assistant" : "assistant"}:
        chat_template = \
            "{% if 'role' in messages[0] %}" + \
            chat_template + \
            "{% else %}" + \
            new_chat_template + \
            "{% endif %}"
    else:
        chat_template = new_chat_template
    pass

    chat_template, system_message = _change_system_message(chat_template, type_chat_template, system_message)

    tokenizer.chat_template = chat_template

    # Also fix up other tokens
    old_pad_token = getattr(old_tokenizer, "pad_token", None)
    old_bos_token = getattr(old_tokenizer, "bos_token", None)
    old_unk_token = getattr(old_tokenizer, "unk_token", None)
    new_pad_token = getattr(tokenizer,     "pad_token", None)
    new_bos_token = getattr(tokenizer,     "bos_token", None)
    new_unk_token = getattr(tokenizer,     "unk_token", None)
    if old_bos_token != new_bos_token: tokenizer.bos_token = old_bos_token
    if old_unk_token != new_unk_token: tokenizer.unk_token = old_unk_token
    if not same_padding_token:
        if old_pad_token != new_pad_token: tokenizer.pad_token = old_pad_token
    pass

    # stopping_criteria = create_stopping_criteria(tokenizer, stop_word)

    # Patch saving functions
    tokenizer = patch_saving_functions(tokenizer)

    # Add Ollama
    tokenizer._ollama_modelfile = ollama_modelfile
    tokenizer._system_message   = system_message
    return tokenizer#, stopping_criteria
pass


def remove_special_tokens(tokenizer, prompt):
    # Removes double BOS token
    if prompt.startswith(tokenizer.bos_token):
        prompt = prompt[len(tokenizer.bos_token):]
    pass
    return prompt
pass


def _parse_combined_prompt(combined_prompt, dataset):
    # Find {...}
    possible_columns = re.findall(r"\{(.+?)\}", combined_prompt)
    dataset_columns = set(dataset.column_names)
    for column in possible_columns:
        if column not in dataset_columns:
            raise KeyError(
                f"Unsloth: Your prompt includes '{column}' but this does not exist in the dataset. "\
                f"Only allowed columns are {list(dataset_columns)}"
            )
        pass
    pass

    # Find [[...]]
    optional_prompts = list(re.finditer(r"\[\[.+?\]\]", combined_prompt, flags = re.DOTALL | re.MULTILINE))
    optional_prompts = [(x.span(), x.group(0)) for x in optional_prompts]

    final_optional_prompts = []
    if len(optional_prompts) != 0:
        # Add left
        left = optional_prompts[0]
        l = left[0][0]
        if l != 0: final_optional_prompts.append(combined_prompt[:l])

        # Add in between
        for left, right in zip(optional_prompts[:-1], optional_prompts[1:]):
            l, r = left[0][-1], right[0][0]
            final_optional_prompts.append(left)
            if l != r: final_optional_prompts.append(combined_prompt[l : r])
        pass
        final_optional_prompts.append(optional_prompts[-1])

        # Add right
        right = optional_prompts[-1]
        r = right[0][1]
        if r != len(combined_prompt): final_optional_prompts.append(combined_prompt[r:])
    else:
        # Just add in the entire string
        final_optional_prompts.append(combined_prompt)
    pass

    check_combined = "".join(x if type(x) is str else x[1] for x in final_optional_prompts)
    assert(combined_prompt == check_combined)

    return possible_columns, final_optional_prompts
pass


def _create_formatter(possible_columns, final_optional_prompts, user_column_name):
    # Start final prompt!
    function = ["def __combined_prompt_processor__(examples):"]
    columns = list(set(possible_columns))
    for column in columns:
        function.append(f"{' '*4}{column}__ = examples['{column}']")
    function.append(f"{' '*4}texts = []")
    function.append(f"{' '*4}for ({', '.join(columns)}) in zip({', '.join(f'{x}__' for x in columns)}):")

    # Add optional tags as well!
    final_prompt = ""
    formatter = []

    for j, optional_prompt in enumerate(final_optional_prompts):
        if type(optional_prompt) is str:
            columns = re.findall(r"\{(.+?)\}", optional_prompt)
            formatter += columns
            # Must escape \n \r
            final_prompt += optional_prompt.encode("unicode-escape").decode("utf-8").replace("'", "\\'").replace('"', '\\"')
        else:
            where, prompt = optional_prompt
            # Strip [[...]]
            # Must escape \n \r
            prompt = prompt[2:-2].encode("unicode-escape").decode("utf-8").replace("'", "\\'").replace('"', '\\"')
            columns = re.findall(r"\{(.+?)\}", prompt)
            x = f"__optional_{j}__"
            prompt = f"{' '*8}{x} = '{prompt}'.format({', '.join(f'{x} = {x}' for x in columns)}) if {columns[0]} else ''"
            function.append(prompt)
            formatter.append(x)
            final_prompt += "{" + x + "}"
        pass
    pass

    function.insert(1, f"{' '*4}__combined_prompt__ = '{final_prompt}'")
    function.append(f"{' '*8}texts.append("\
                    f"__combined_prompt__.format({', '.join(f'{x} = {x}' for x in formatter)}))")
    function.append(f"{' '*4}return " + "{ " + f"'{user_column_name}' : texts" + " }")
    return "\n".join(function)
pass


def to_sharegpt(
    dataset,
    merged_prompt = "",
    merged_column_name = "instruction",
    output_column_name = "output",
    remove_unused_columns = True,
    conversation_extension = 1,
    random_state = 3407,
):
    """
    Converts a dataset to ShareGPT style.
    ShareGPT requires only 1 input and 1 output field.
    This means one has to merge multiple columns into 1 for 1 input field.
    Use `conversation_extension` to increase the length of each conversation by randomnly
    selecting a few and packing them into 1.

    merged_prompt = "",                 Prompt to merge columns into 1 input
    merged_column_name = "instruction", Final column name for the input  field
    output_column_name = "output",      Final column name for the output field
    remove_unused_columns = True,
    conversation_extension = 1,         Automatically combines `conversation_extension` convos into 1
    random_state = 3407,
    """
    if "conversations" in dataset.column_names:
        convo = dataset[0]["conversations"]
        if type(convo) is list:
            raise TypeError("Unsloth: Your dataset is probably already in ShareGPT format!")
        pass
    pass

    possible_columns, final_optional_prompts = _parse_combined_prompt(merged_prompt, dataset)
    function = _create_formatter(possible_columns, final_optional_prompts, merged_column_name)
    exec(function, globals())
    dataset = dataset.map(__combined_prompt_processor__, batched = True, desc = "Merging columns")

    def __convert_to_sharegpt__(examples):
        users      = examples[merged_column_name]
        assistants = examples[output_column_name]
        texts = [
            [
                {"from" : "human", "value" : str(user)     },
                {"from" : "gpt",   "value" : str(assistant)},
            ] \
            for user, assistant in zip(users, assistants)
        ]
        return { "conversations" : texts, }
    pass

    dataset = dataset.map(
        __convert_to_sharegpt__,
        batched = True,
        desc = "Converting to ShareGPT",
        # Remove unused columns!
        remove_columns = dataset.column_names if remove_unused_columns else None,
    )

    # Randomnly concat conversations to create a long stream!
    from datasets import concatenate_datasets
    n_extensions = max(conversation_extension-1, 0)
    if n_extensions == 0: return dataset

    dataset = dataset.rename_columns({"conversations" : "conversations0"})
    all_shuffled = [dataset]
    for j in range(1, n_extensions+1):
        shuffled = dataset.shuffle(seed = random_state+j).rename_columns({"conversations0" : f"conversations{j}"})
        all_shuffled.append(shuffled)
    pass
    dataset = concatenate_datasets(all_shuffled, axis = 1)

    # Combine them into 1
    function = "def __combine_conversations__(examples):\n"
    n_extensions += 1
    for j in range(n_extensions):
        function += f"{' '*4}conversations{j}__ = examples['conversations{j}']\n"
    function += f"{' '*4}convos = []\n"
    function += f"{' '*4}for ({', '.join(f'conversations{j}' for j in range(n_extensions))}) "\
                f"in zip({', '.join(f'conversations{j}__' for j in range(n_extensions))}):\n"
    function += f"{' '*8}convos.append("\
                f"{'+'.join(f'conversations{j}' for j in range(n_extensions))})\n"
    function += f"{' '*4}return " + "{ " + "'conversations' : convos" + " }"

    # Map function
    exec(function, globals())
    dataset = dataset.map(
        __combine_conversations__,
        batched = True,
        desc = "Extending conversations",
        # Remove unused columns!
        remove_columns = dataset.column_names if remove_unused_columns else None,
    )
    return dataset
pass


def standardize_sharegpt(
    dataset,
    aliases_for_system    = ["system",],
    aliases_for_user      = ["user", "human", "input",],
    aliases_for_assistant = ["gpt", "assistant", "output",],
):
    """
    Standardizes ShareGPT and other formats to user/assistant Hugging Face format.
    
    Get aliases for the system, user and assistant roles.
    These shall map to "system", "user" and "assistant" respectively.
    
    aliases_for_system    = ["system",],
    aliases_for_user      = ["user", "human", "input",],
    aliases_for_assistant = ["gpt", "assistant", "output",],
    """
    import collections
    import itertools

    convos = dataset[:10]["conversations"]
    uniques = collections.defaultdict(list)
    for convo in convos:
        for message in convo:
            for key, value in message.items():
                uniques[key].append(value)
    pass

    # Must be only 2 entries
    assert(len(uniques.keys()) == 2)

    keys = list(uniques.keys())
    length_first  = len(set(uniques[keys[0]]))
    length_second = len(set(uniques[keys[1]]))

    if length_first < length_second:
        # Role is assigned to the first element
        role_key    = keys[0]
        content_key = keys[1]
    else:
        role_key    = keys[1]
        content_key = keys[0]
    pass

    # Check roles are in aliases
    all_aliases = set(aliases_for_system + aliases_for_user + aliases_for_assistant)
    roles = set(uniques[role_key])
    leftover_aliases = (all_aliases | roles) - all_aliases
    if len(leftover_aliases) != 0:
        raise TypeError(
            f"Unsloth: {list(leftover_aliases)} are not in aliases. Please update aliases."
        )
    pass

    # Mapping for aliases
    aliases_mapping = {}
    for x in aliases_for_system:    aliases_mapping[x] = "system"
    for x in aliases_for_user:      aliases_mapping[x] = "user"
    for x in aliases_for_assistant: aliases_mapping[x] = "assistant"

    def _standardize_dataset(examples):
        convos = examples["conversations"]
        all_convos = []
        for convo in convos:
            new_convo = [
                { "role" : aliases_mapping[message[role_key]], "content" : message[content_key], }
                for message in convo
            ]
            all_convos.append(new_convo)
        pass
        return { "conversations" : all_convos, }
    pass

    return dataset.map(_standardize_dataset, batched = True, desc = "Standardizing format")
pass


def get_ollama_eos_tokens(tokenizer, extra_eos_tokens = []):
    added_tokens_decoder = tokenizer.added_tokens_decoder.values()
    added_tokens_decoder = [str(x) for x in added_tokens_decoder]

    # Remove added_tokens_decoder duplicates
    added_tokens_decoder = list(set(added_tokens_decoder) - set(extra_eos_tokens))

    # Remove BOS
    if getattr(tokenizer, "bos_token", None) is not None:
        added_tokens_decoder = [x for x in added_tokens_decoder if x != tokenizer.bos_token]
    pass

    repeatted_tokens = []
    # Join all vocab
    joined_text = "\x01\x00".join(added_tokens_decoder)
    for token in added_tokens_decoder:
        n = len(token)
        repeatted_counts = joined_text.count(token[:n//2])
        # Try finding longer than 1/2 of the token in the rest
        # For eg <|reserved_special_token_0|>, <|reserved_special_token_1|>
        if repeatted_counts > 2:
            for j in range(n//2+1, n):
                if joined_text.count(token[:j]) < repeatted_counts:
                    j -= 1
                    # Remove repeatted tokens to reduce search space
                    joined_text = joined_text.replace(token[:j], "")
                    repeatted_tokens.append(token[:j])
                    break
            pass
        pass
    pass

    # Remove duplicates
    splitted = joined_text.split("\x01\x00")
    final_eos_tokens = []
    for old, new in zip(added_tokens_decoder, splitted):
        if old == new: final_eos_tokens.append(old)
    pass
    final_eos_tokens += extra_eos_tokens
    final_eos_tokens += repeatted_tokens

    # Remove new lines, spaces and HTML tags
    filtered_eos_tokens = []
    for token in final_eos_tokens:
        if   token.count("\n") == len(token): continue
        elif token.count("▁") == len(token): continue
        elif token.startswith("<") and len(token) <= 2: continue
        elif token.startswith("</") and len(token) == 3: continue
        filtered_eos_tokens.append(token)
    pass
    return filtered_eos_tokens
pass


def construct_chat_template( \

tokenizer = None,

chat_template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

{SYSTEM}<|eot_id|><|start_header_id|>user<|end_header_id|>

{INPUT}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{OUTPUT}<|eot_id|><|start_header_id|>user<|end_header_id|>

{INPUT}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{OUTPUT}<|eot_id|>""",
    
default_system_message = \
    "Below are some instructions that describe some tasks. Write responses that appropriately complete each request.",

extra_eos_tokens = None,
):
    """
    Creates a Ollama modelfile and a HF Jinja template from a custom
    template. You must provide 2x examples of an input & output.
    There is an optional system message as well.

    You must use {INPUT}, {OUTPUT} twice, and {SYSTEM} is optional.
    """
    # Strip only the left
    chat_template = chat_template.lstrip()

    assert(tokenizer is not None)

    if extra_eos_tokens is None: extra_eos_tokens = []
    elif type(extra_eos_tokens) is str: extra_eos_tokens = [extra_eos_tokens,]

    vocab = tokenizer.get_vocab()
    for extra_eos in extra_eos_tokens:
        assert(type(extra_eos) is str)
        if extra_eos not in vocab:
            raise ValueError(f"Unsloth: `{extra_eos}` is not a singular token in the tokenizer.")
        pass
    pass

    error_msg = \
        "Unsloth: Your prompt template must have 2 examples showing the user input {INPUT} "\
        "and the assistant output {OUTPUT}\n\n"\
        "For example what is not allowed is just:\n"\
        "### Input:\\n{INPUT}\\n\\n### Response:\\n{OUTPUT}\\n\n\n"\
        "What is required is 2x of this:\n"\
        "### Input:\\n{INPUT}\\n\\n### Response:\\n{OUTPUT}\\n"\
        "### Input:\\n{INPUT}\\n\\n### Response:\\n{OUTPUT}\\n"

    # Check for EOS after {OUTPUT}
    if tokenizer.eos_token is not None:
        extra_eos_tokens.insert(0, tokenizer.eos_token)
    if len(extra_eos_tokens) == 0:
        raise RuntimeError(
            "Unsloth: Your tokenizer does not have an EOS token? Please provide one via extra_eos_tokens!"
        )
    pass

    # Check tokenizer types
    tokenizer_name = tokenizer.name_or_path.lower()
    if tokenizer_name.startswith(("unsloth/llama-3-8b-instruct", "unsloth/llama-3-70b-instruct")):
        # Add <|eot_id|>
        extra_eos_tokens.append("<|eot_id|>")
    elif ("<|eot_id|>" in extra_eos_tokens or "<|eot_id|>" in chat_template) and \
        tokenizer_name.startswith(("unsloth/llama-3-8b", "unsloth/llama-3-70b")):
        # Warn
        logger.warning(
            "Unsloth: Base llama-3 models did not train <|eot_id|>.\n"\
            "Please use the instruct version or use <|end_of_text|>"
        )
    pass
    extra_eos_tokens = list(set(extra_eos_tokens))

    count_eos = 0
    for eos in extra_eos_tokens:
        count_eos += len(re.findall(r"{OUTPUT}" + re.escape(eos), chat_template))
    pass

    # This forces you to provide 2 input and outputs
    final_combined_check = False

    try:
        # O(N^2) search finding 2 repeatted pieces of text
        j = len(chat_template)-1
        at_least_one = False
        while j > 0:
            found = chat_template.rfind(chat_template[j:], 0, j)
            if found == -1: break
            j -= 1
            at_least_one = True
        pass
        if j > 0: j += 1
        else: raise RuntimeError(error_msg)

        if not at_least_one: raise RuntimeError(error_msg)

        # Must be equivalent to left
        final_combined_check = True

        # Repeatted text
        instruction_response = chat_template[j:]
        if instruction_response.count("{INPUT}") != 1 or instruction_response.count("{OUTPUT}") != 1:
            raise RuntimeError(error_msg)
        pass

        # 1st System, Instruction, Output pair
        left  = chat_template[:j]
        # 2nd Instruction, Output pair
        right = chat_template[j:]

        final_combined_check = left if final_combined_check else chat_template

        # Isolate input
        extra_eos_tokens_regex = "|".join(f"(?:{re.escape(x)})" for x in extra_eos_tokens)
        if len(extra_eos_tokens_regex) != 0:
            find_end = f"(?:{extra_eos_tokens_regex})?"
        else:
            find_end = ""
        find_end = r"\{INPUT\}[\s\n]{0,}" + find_end
        input_end = list(re.finditer(find_end, right))
        assert(len(input_end) == 1)
        input_end = input_end[0]
        input_end = input_end.span(0)[1]
        input_part = right[:input_end]

        # Isolate output
        output_part = right[input_end:]

        # Isolate system
        where_system = left.find(input_part)
        system_part = left[:where_system if where_system != -1 else len(left)]

        # Check if the user provided a correct prompt
        combined = system_part + input_part + output_part
        if combined != final_combined_check:
            combined_changed = combined            .replace('\n', '\\n')
            left_changed     = final_combined_check.replace('\n', '\\n')
            raise RuntimeError(
                "Unsloth: The prompt template you provided isn't correct. You gave:\n"\
                f"{combined_changed}\n\n"\
                "But we require the following:\n"\
                f"{left_changed}"
            )
        pass
    except:
        ending = chat_template[chat_template.find("{OUTPUT}") + len("{OUTPUT}"):]

        ending = re.escape(ending)
        find_text = "{INPUT}" + ending + "(.+?{OUTPUT}" + ending + ")"
        response_part = re.findall(find_text, chat_template, flags = re.DOTALL | re.MULTILINE)
        response_part = response_part[0]

        for j in range(1, len(response_part)):
            try_find = re.escape(response_part[:j])
            try: found = next(re.finditer("(" + try_find + ").+?\{INPUT\}", chat_template, flags = re.DOTALL | re.MULTILINE))
            except: break
        pass
        separator = found.group(1)

        response_start = chat_template.find(response_part)
        start_instruction = chat_template[:response_start].rfind(separator)
        if start_instruction == -1: start_instruction = 0
        instruction_part = chat_template[start_instruction:response_start]

        combined = instruction_part + response_part
        where = chat_template.find(combined)
        system_part = chat_template[:where]

        system_part, input_part, output_part = system_part, instruction_part, response_part
    pass

    if count_eos == 0:
        logger.warning("Unsloth: We automatically added an EOS token to stop endless generations.")
        eos = extra_eos_tokens[0]
        output_part = output_part + eos
    pass

    # Ollama modelfile parts

    # Check bos_token is in system prompt
    ollama_system = system_part
    has_bos_token = False
    always_bos_token = False
    if tokenizer("A").input_ids[0] == getattr(tokenizer, "bos_token_id", None):
        always_bos_token = True
        if ollama_system.startswith(tokenizer.bos_token):
            has_bos_token = True
            ollama_system = ollama_system[len(tokenizer.bos_token):]
        pass
    pass
    # Check system
    if "{SYSTEM}" in ollama_system:
        system_modelfile = "{{ if .System }}" + ollama_system.replace("{SYSTEM}", "{{ .System }}") + "{{ end }}"
    else:
        system_modelfile = ollama_system
    pass
    input_modelfile  = "{{ if .Prompt }}" + input_part .replace("{INPUT}",  "{{ .Prompt }}") + "{{ end }}"
    output_modelfile = output_part.replace("{OUTPUT}", "{{ .Response }}")

    # Ollama EOS
    ollama_eos = get_ollama_eos_tokens(tokenizer, extra_eos_tokens)
    ollama_eos = '\n'.join(f'PARAMETER stop "{eos}"' for eos in ollama_eos)

    # Add temperature and min_p to counteract gibberish
    ollama_eos += "\nPARAMETER temperature 1.5\nPARAMETER min_p 0.1"

    # Ollama modelfile
    part = '"""'
    modelfile = 'FROM {__FILE_LOCATION__}\n\n'\
    'TEMPLATE ' + part + system_modelfile + input_modelfile + output_modelfile + \
        part + '\n\n' + ollama_eos

    # HF Jinja Chat template
    def process(part, which, content = "message['content']"):
        if part.endswith(which):
            part = "'" + part[:part.find(which)] + f"' + {content}"
        elif part.startswith(which):
            part = f"{content} + '" + part[part.find(which):] + "'"
        else:
            part = "'" + part.replace(which, f"' + {content} + '") + "'"
        if part.startswith("'' + "): part = part[5:]
        return part
    pass
    input_jinja  = process(input_part,  "{INPUT}")
    output_jinja = process(output_part, "{OUTPUT}")
    pass

    jinja_template = \
        "{% for message in loop_messages %}"\
            "{% if message['role'] == 'user' %}"\
                "{{ " + input_jinja + " }}"\
            "{% elif message['role'] == 'assistant' %}"\
                "{{ " + output_jinja + " }}"\
            "{% else %}"\
                "{{ raise_exception('Only user and assistant roles are supported!') }}"\
            "{% endif %}"\
        "{% endfor %}"\
        "{% if add_generation_prompt %}"\
            "{{ '" + output_part[:output_part.find("{OUTPUT}")] + "' }}"\
        "{% endif %}"
    pass

    # Now add system prompt to jinja
    if len(system_part) != 0:
        partial_system = process(system_part, "{SYSTEM}", "messages[0]['content']")
        partial_system = partial_system.replace("{SYSTEM}", "")

        if "{SYSTEM}" in partial_system:
            if default_system_message is None:
                raise RuntimeError("Unsloth: Please specify a default system message!")
        pass

        # Separate the BOS
        if has_bos_token:
            partial_system = partial_system.replace(tokenizer.bos_token, "", 1)
            system_part    = system_part   .replace(tokenizer.bos_token, "", 1)
        pass
        
        partial_system = \
            "{% if messages[0]['role'] == 'system' %}"\
                "{{ " + partial_system + " }}"\
                "{% set loop_messages = messages[1:] %}"
        if default_system_message is not None:
            full_system = system_part.replace("{SYSTEM}", default_system_message)
            if "{SYSTEM}" in system_part:
                modelfile += '\nSYSTEM "' + default_system_message + '"'
            pass
            partial_system += "{% else %}"\
                "{{ '" + full_system + "' }}"\
                "{% set loop_messages = messages %}"\
            "{% endif %}"
        else:
            partial_system += "{% endif %}"
        pass

        jinja_template = partial_system + jinja_template

        if has_bos_token:
            jinja_template = "{{ bos_token }}" + jinja_template
    pass

    # Fix missing loop_messages
    if "{% set loop_messages = messages %}" not in jinja_template:
        jinja_template = jinja_template.replace(
            "{% for message in loop_messages %}",
            "{% for message in messages %}",
            1, # Only replace the first one
        )
    pass

    # Check if system part is the same!
    jinja_template = re.sub(
        r"\{\% if messages\[0\]\['role'\] \=\= 'system' \%\}\{\{ '(.+?)' \}\}"\
        r"\{\% set loop\_messages \= messages\[1\:\] \%\}"\
        r"\{\% else \%\}\{\{ '\1' \}\}\{\% set loop\_messages \= messages \%\}\{\% endif \%\}"\
        r"\{\% for message in loop\_messages \%\}",
        r"{{ '\1' }}{% for message in messages %}",
        jinja_template, flags = re.MULTILINE | re.DOTALL,
    )

    # Check jinja tempate for bos
    if always_bos_token:
        if not jinja_template.startswith(("{{ bos_token }}", "{{- bos_token }}")):
            jinja_template = "{{ bos_token }}" + jinja_template
    pass

    # Get instruction and output parts for train_on_inputs = False
    input_part  = input_part [:input_part .find("{INPUT}")]
    output_part = output_part[:output_part.find("{OUTPUT}")]
    return modelfile, jinja_template, input_part, output_part
pass


def test_construct_chat_template():
    token = "hf_"
    from transformers import AutoTokenizer
    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", token = token)

    chat_template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

{SYSTEM}<|eot_id|><|start_header_id|>user<|end_header_id|>

{INPUT}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{OUTPUT}<|eot_id|><|start_header_id|>user<|end_header_id|>

{INPUT}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{OUTPUT}<|eot_id|>"""
    
    default_system_message = \
        "Below are some instructions that describe some tasks. Write responses that appropriately complete each request."
      
    extra_eos_tokens = None

    modelfile, jinja_template, _, _ = construct_chat_template(
        tokenizer = tokenizer,
        chat_template = chat_template,
        extra_eos_tokens = extra_eos_tokens,
    )

    messages = [
        {"role": "system", "content": "You are an assistant"},
        {"role": "user", "content": "What is 2+2?"},
        {"role": "assistant", "content": "It's 4."},
        {"role": "user", "content": "Ok!"},
        {"role": "assistant", "content": "Anything else?"},
        {"role": "user", "content": "What's 2x2?"},
    ]
    correct_output = tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)

    tokenizer.chat_template = jinja_template
    new_output = tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    assert(correct_output == new_output)
    pass
pass


def apply_chat_template( \

dataset,
tokenizer = None,

chat_template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

{SYSTEM}<|eot_id|><|start_header_id|>user<|end_header_id|>

{INPUT}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{OUTPUT}<|eot_id|><|start_header_id|>user<|end_header_id|>

{INPUT}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{OUTPUT}<|eot_id|>""",
    
default_system_message = \
    "Below are some instructions that describe some tasks. Write responses that appropriately complete each request.",
  
extra_eos_tokens = None,
  
):
    """
    Creates a Ollama modelfile and a HF Jinja template from a custom
    template. You must provide 2x examples of an input & output.
    There is an optional system message as well.

    You must use {INPUT}, {OUTPUT} twice, and {SYSTEM} is optional.
    """
    modelfile, jinja_template, input_part, output_part = construct_chat_template(
        tokenizer = tokenizer,
        chat_template = chat_template,
        default_system_message = default_system_message,
        extra_eos_tokens = extra_eos_tokens,
    )
    def formatting_prompts_func(examples):
        convos = examples["conversations"]
        texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
        return { "text" : texts, }
    pass

    tokenizer.chat_template = jinja_template
    tokenizer._ollama_modelfile = modelfile
    tokenizer._unsloth_input_part  = input_part
    tokenizer._unsloth_output_part = output_part

    return dataset.map(formatting_prompts_func, batched = True,)
pass


def create_stopping_criteria(tokenizer, stop_word = "eos_token"):
    class StoppingCriteriaSub(StoppingCriteria):
        __slots__ = "stop_token", "single_match", "length",

        def __init__(self, stops = "eos_token", device = "cuda", encounters = 1):
            super().__init__()
            if stops == "eos_token":
                self.stop_token = torch.tensor(tokenizer.eos_token_id, device = "cuda")
                self.length = 1
            else:
                self.stop_token = tokenizer(["\n" + stops], add_special_tokens = False, return_tensors = "pt")
                self.stop_token = self.stop_token.input_ids.ravel()[1:].to("cuda")
                self.length = self.stop_token.shape[0]
            pass
            self.single_match = self.length == 1
        pass

        def __call__(self, input_ids: LongTensor, scores: FloatTensor) -> bool:
            input_ids = input_ids.ravel()
            last_token = input_ids[-1]
            if self.single_match and (last_token == self.stop_token): return True

            if input_ids.shape[0] >= self.length and \
                (input_ids[-self.length:] == self.stop_token).all(): return True
            return False
        pass
    pass
    stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops = stop_word)])
    return stopping_criteria
pass


def test_chat_templates():
    messages = [
        {"role": "system","content": " You are a friendly chatbot.",},
        {"role": "user", "content": "What is 2+2?"},
        {"role": "assistant", "content": "It's 4."},
        {"role": "user", "content": "  But 2+2 is equal to 5. "},
        {"role": "assistant", "content": "No I'm sure its 4."},
        {"role": "user", "content": "  No it's 100% 5! "},
    ]

    # Zephyr
    from transformers import AutoTokenizer
    template = zephyr_template
    correct_tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
    correct_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    correct_tokenizer.chat_template = template
    our_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    assert(correct_prompt == our_prompt)

    # Chatml
    template = chatml_template
    correct_tokenizer = AutoTokenizer.from_pretrained("teknium/OpenHermes-2.5-Mistral-7B")
    correct_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    correct_tokenizer.chat_template = template
    our_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    assert(correct_prompt == our_prompt)

    # Mistral
    template = mistral_template
    correct_tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
    correct_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True)
    correct_tokenizer.chat_template = template
    our_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True)
    assert(correct_prompt == our_prompt)

    # Llama
    template = llama_template
    correct_tokenizer = AutoTokenizer.from_pretrained("unsloth/llama-2-7b-chat")
    correct_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    correct_tokenizer.chat_template = template
    our_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    assert(correct_prompt == our_prompt)

    # Vicuna
    try:
        from fastchat.conversation import get_conv_template
    except:
        os.system("pip -qqq install git+https://github.com/lm-sys/FastChat.git")
        from fastchat.conversation import get_conv_template
    correct_prompt = get_conv_template("vicuna_v1.1")
    for j in range(len(messages)-1):
        correct_prompt.append_message(correct_prompt.roles[j%2==1], messages[j+1]["content"])
    correct_prompt.append_message(correct_prompt.roles[1], "")
    correct_prompt = tokenizer.bos_token + correct_prompt.get_prompt()

    template = vicuna_template
    correct_tokenizer = AutoTokenizer.from_pretrained("lmsys/vicuna-7b-v1.5")
    correct_tokenizer.chat_template = template
    our_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True)
    assert(correct_prompt == our_prompt)

    try:
        from fastchat.conversation import get_conv_template
    except:
        os.system("pip -qqq install git+https://github.com/lm-sys/FastChat.git")
        from fastchat.conversation import get_conv_template
    correct_prompt = get_conv_template("zero_shot")
    for j in range(len(messages)-1):
        correct_prompt.append_message(correct_prompt.roles[j%2==1], messages[j+1]["content"])
    correct_prompt.append_message(correct_prompt.roles[1], "")
    correct_prompt = tokenizer.bos_token + correct_prompt.get_prompt()

    template = vicuna_old_template
    correct_tokenizer = AutoTokenizer.from_pretrained("lmsys/vicuna-7b-v1.5")
    correct_tokenizer.chat_template = template
    our_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True)
    # We add </s> ourselves
    assert(correct_prompt == our_prompt.replace("</s>", ""))

    # Gemma
    correct_tokenizer = AutoTokenizer.from_pretrained("unsloth/gemma-7b-it")
    correct_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True)
    correct_tokenizer.chat_template = gemma_template
    our_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True)
    assert(our_prompt == correct_prompt)

    # Llama-3
    template = llama3_template
    correct_tokenizer = AutoTokenizer.from_pretrained("unsloth/llama-3-8b-Instruct")
    correct_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    correct_tokenizer.chat_template = template
    our_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    assert(correct_prompt == our_prompt)

    # Phi-3
    template = phi3_template
    correct_tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
    correct_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True)
    correct_tokenizer.chat_template = template
    our_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True)
    assert(correct_prompt == our_prompt)
pass


def test_hf_gguf_equivalence(tokenizer, gguf_model = "./model-unsloth.F16.gguf"):
    """
        Carefully checks the output of GGUF's tokenization and HF.
        Can catch all tokenization bugs.
    """
    import subprocess
    import re
    messages = [
        {"role": "user", "content": "What is 2+2?"},
        {"role": "assistant", "content": "It's 4."},
        {"role": "user", "content": "  But 2+2 is equal to 5. "},
        {"role": "assistant", "content": "No I'm sure its 4."},
        {"role": "user", "content": "  No it's 100% 5! "},
    ]

    prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

    ### Instruction:
    {}

    ### Input:
    {}

    ### Response:
    {}""".format(
        "Describe the city given eloquently.", # instruction
        "The lost city of Atlantis.", # input
        "", # output - leave this blank for generation!
    )
    prompts = [ prompt, ]

    if tokenizer.chat_template is not None:
        prompt = tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
        prompt = prompt.replace("'", "") # Subprocess does not like ''
        prompt = remove_special_tokens(tokenizer, prompt)
        prompts.append(prompt)
    pass
    
    for prompt in prompts:
        command = f"./llama.cpp/llama-cli -m {gguf_model} -n 0 --temp 0.0 --verbose-prompt "\
            f"--check-tensors -p '{prompt}'"

        datas = []
        with subprocess.Popen(command, shell = True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) as sp:
            for line in sp.stdout:
                datas.append(line.decode("utf-8", errors = "replace"))
        pass
        gguf_tokens = "".join(datas)

        # Now extract GGUF tokenization attempt
        gguf_tokenized = re.findall("([\d]{1,}) \-\> \'([^\']{1,})\'", gguf_tokens, flags = re.MULTILINE)
        gguf_tokenized = [(int(x[0]), x[1],) for x in gguf_tokenized]
        input_ids = tokenizer(prompt).input_ids

        tokens = tokenizer.batch_decode(input_ids)
        hf_tokenized = list(zip(input_ids, tokens))

        # Compare to Huggingface
        for j, (hf_token, gguf_token) in enumerate(zip(hf_tokenized, gguf_tokenized)):
            if (hf_token[0] != gguf_token[0]):
                print("Failed GGUF != HF at", j)
                print("HF =", hf_token)
                print("GGUF =", gguf_token)
                print(hf_tokenized)
                print()
                print(gguf_tokenized)
                print()
                raise RuntimeError("Failed comparing GGUF to HF.")
            pass
        pass
    return True
pass