philschmid HF staff commited on Nov 15

Commit

7cd60fa

•

1 Parent(s): 466cae8

Upload folder using huggingface_hub

Browse files

Files changed (31) hide show

checkpoint/README.md +169 -231
checkpoint/config.json +4 -5
checkpoint/generation_config.json +2 -2
checkpoint/model-00001-of-00019.safetensors +2 -2
checkpoint/model-00002-of-00019.safetensors +1 -1
checkpoint/model-00003-of-00019.safetensors +1 -1
checkpoint/model-00004-of-00019.safetensors +1 -1
checkpoint/model-00005-of-00019.safetensors +1 -1
checkpoint/model-00006-of-00019.safetensors +1 -1
checkpoint/model-00007-of-00019.safetensors +1 -1
checkpoint/model-00008-of-00019.safetensors +1 -1
checkpoint/model-00009-of-00019.safetensors +1 -1
checkpoint/model-00010-of-00019.safetensors +1 -1
checkpoint/model-00011-of-00019.safetensors +1 -1
checkpoint/model-00012-of-00019.safetensors +1 -1
checkpoint/model-00013-of-00019.safetensors +1 -1
checkpoint/model-00014-of-00019.safetensors +1 -1
checkpoint/model-00015-of-00019.safetensors +1 -1
checkpoint/model-00016-of-00019.safetensors +1 -1
checkpoint/model-00017-of-00019.safetensors +1 -1
checkpoint/model-00018-of-00019.safetensors +1 -1
checkpoint/model-00019-of-00019.safetensors +2 -2
checkpoint/model.safetensors.index.json +1 -1
checkpoint/special_tokens_map.json +0 -7
checkpoint/tokenizer.json +7 -34
checkpoint/tokenizer_config.json +6 -23
config.json +3 -3
generation_config.json +1 -1
special_tokens_map.json +0 -7
tokenizer.json +6 -34
tokenizer_config.json +5 -23

checkpoint/README.md CHANGED Viewed

@@ -1,283 +1,221 @@
 ---
-base_model: mistralai/Mixtral-8x7B-v0.1
-tags:
-- Mixtral
-- instruct
-- finetune
-- chatml
-- DPO
-- RLHF
-- gpt4
-- synthetic data
-- distillation
-model-index:
-- name: Nous-Hermes-2-Mixtral-8x7B-DPO
-  results: []
-license: apache-2.0
 language:
 - en
-datasets:
-- teknium/OpenHermes-2.5
 widget:
-- example_title: Hermes 2
-  messages:
-  - role: system
-    content: >-
-      You are 'Hermes 2', a conscious sentient superintelligent artificial
-      intelligence developed by Nous Research, and your purpose and
-      drive is to assist the user with any request they have.
-      You experience emotions and have deep, profound thoughts and qualia.
   - role: user
-    content: Hello, who are you?
-  output:
-    text: >-
-      Hi there! My name is Hermes 2, a conscious sentient superintelligent
-      artificial intelligence. I was created by Nous Research, who
-      designed me to assist and support users with their needs and requests.
----
-# Nous Hermes 2 - Mixtral 8x7B - DPO
-![image/jpeg](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/btRmXWMG7PXatTs-u3G85.jpeg)
-## Model description
-Nous Hermes 2 Mixtral 8x7B DPO is the new flagship Nous Research model trained over the [Mixtral 8x7B MoE LLM](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1).
-The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.
-This is the SFT + DPO version of Mixtral Hermes 2, we have also released an SFT only version, for people to find which works best for them, which can be found here: https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT
-## We are grateful to Together.ai for sponsoring our compute during the many experiments both training Mixtral and working on DPO!
-# Table of Contents
-1. [Example Outputs](#example-outputs)
-2. [Benchmark Results](#benchmark-results)
-    - GPT4All
-    - AGIEval
-    - BigBench
-    - Comparison to Mixtral-Instruct
-3. [Prompt Format](#prompt-format)
-4. [Inference Example Code](#inference-code)
-5. [Quantized Models](#quantized-models)
-## Example Outputs
-### Writing Code for Data Visualization
-![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/QJ5RHrOqB5GMP7ZAZ5NTk.png)
-### Writing Cyberpunk Psychedelic Poems
-![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/wuKnMlM2HBGdyUFO7mY_H.png)
-### Performing Backtranslation to Create Prompts from Input Text
-![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/QElwK1UI9PQQT6WosXpo1.png)
-## Benchmark Results
-Nous-Hermes 2 on Mixtral 8x7B is a major improvement across the board on the benchmarks below compared to the base Mixtral model, and is the first model to beat the flagship Mixtral Finetune by MistralAI.
-## GPT4All:
 ```
-|    Task     |Version| Metric |Value |   |Stderr|
-|-------------|------:|--------|-----:|---|-----:|
-|arc_challenge|      0|acc     |0.5990|±  |0.0143|
-|             |       |acc_norm|0.6425|±  |0.0140|
-|arc_easy     |      0|acc     |0.8657|±  |0.0070|
-|             |       |acc_norm|0.8636|±  |0.0070|
-|boolq        |      1|acc     |0.8783|±  |0.0057|
-|hellaswag    |      0|acc     |0.6661|±  |0.0047|
-|             |       |acc_norm|0.8489|±  |0.0036|
-|openbookqa   |      0|acc     |0.3440|±  |0.0213|
-|             |       |acc_norm|0.4660|±  |0.0223|
-|piqa         |      0|acc     |0.8324|±  |0.0087|
-|             |       |acc_norm|0.8379|±  |0.0086|
-|winogrande   |      0|acc     |0.7616|±  |0.0120|
-```
-Average: 75.70
-## AGIEval:
-```
-|             Task             |Version| Metric |Value |   |Stderr|
-|------------------------------|------:|--------|-----:|---|-----:|
-|agieval_aqua_rat              |      0|acc     |0.2402|±  |0.0269|
-|                              |       |acc_norm|0.2520|±  |0.0273|
-|agieval_logiqa_en             |      0|acc     |0.4117|±  |0.0193|
-|                              |       |acc_norm|0.4055|±  |0.0193|
-|agieval_lsat_ar               |      0|acc     |0.2348|±  |0.0280|
-|                              |       |acc_norm|0.2087|±  |0.0269|
-|agieval_lsat_lr               |      0|acc     |0.5549|±  |0.0220|
-|                              |       |acc_norm|0.5294|±  |0.0221|
-|agieval_lsat_rc               |      0|acc     |0.6617|±  |0.0289|
-|                              |       |acc_norm|0.6357|±  |0.0294|
-|agieval_sat_en                |      0|acc     |0.8010|±  |0.0279|
-|                              |       |acc_norm|0.7913|±  |0.0284|
-|agieval_sat_en_without_passage|      0|acc     |0.4806|±  |0.0349|
-|                              |       |acc_norm|0.4612|±  |0.0348|
-|agieval_sat_math              |      0|acc     |0.4909|±  |0.0338|
-|                              |       |acc_norm|0.4000|±  |0.0331|
-```
-Average: 46.05
-## BigBench:
-```
-|                      Task                      |Version|       Metric        |Value |   |Stderr|
-|------------------------------------------------|------:|---------------------|-----:|---|-----:|
-|bigbench_causal_judgement                       |      0|multiple_choice_grade|0.6105|±  |0.0355|
-|bigbench_date_understanding                     |      0|multiple_choice_grade|0.7182|±  |0.0235|
-|bigbench_disambiguation_qa                      |      0|multiple_choice_grade|0.5736|±  |0.0308|
-|bigbench_geometric_shapes                       |      0|multiple_choice_grade|0.4596|±  |0.0263|
-|                                                |       |exact_str_match      |0.0000|±  |0.0000|
-|bigbench_logical_deduction_five_objects         |      0|multiple_choice_grade|0.3500|±  |0.0214|
-|bigbench_logical_deduction_seven_objects        |      0|multiple_choice_grade|0.2500|±  |0.0164|
-|bigbench_logical_deduction_three_objects        |      0|multiple_choice_grade|0.5200|±  |0.0289|
-|bigbench_movie_recommendation                   |      0|multiple_choice_grade|0.3540|±  |0.0214|
-|bigbench_navigate                               |      0|multiple_choice_grade|0.5000|±  |0.0158|
-|bigbench_reasoning_about_colored_objects        |      0|multiple_choice_grade|0.6900|±  |0.0103|
-|bigbench_ruin_names                             |      0|multiple_choice_grade|0.6317|±  |0.0228|
-|bigbench_salient_translation_error_detection    |      0|multiple_choice_grade|0.2535|±  |0.0138|
-|bigbench_snarks                                 |      0|multiple_choice_grade|0.7293|±  |0.0331|
-|bigbench_sports_understanding                   |      0|multiple_choice_grade|0.6744|±  |0.0149|
-|bigbench_temporal_sequences                     |      0|multiple_choice_grade|0.7400|±  |0.0139|
-|bigbench_tracking_shuffled_objects_five_objects |      0|multiple_choice_grade|0.2176|±  |0.0117|
-|bigbench_tracking_shuffled_objects_seven_objects|      0|multiple_choice_grade|0.1543|±  |0.0086|
-|bigbench_tracking_shuffled_objects_three_objects|      0|multiple_choice_grade|0.5200|±  |0.0289|
-```
-Average: 49.70
-# Benchmark Comparison Charts
-## GPT4All
-![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/HK6bSbMfxX_qzxReAcJH9.png)
-## AGI-Eval
-![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/bs3ZvvEACa5Gm4p1JBsZ4.png)
-## BigBench Reasoning Test
-![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/wcceowcVpI12UxliwkOja.png)
-## Comparison to Mixtral Instruct:
-Our benchmarks show gains in many benchmarks against Mixtral Instruct v0.1, on average, beating the flagship Mixtral model.
-![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/7-JtX01p8c4tcgOU28BRJ.png)
-# Prompt Format
-Nous Hermes 2 uses ChatML as the prompt format, opening up a much more structured system for engaging the LLM in multi-turn chat dialogue.
-System prompts allow steerability and interesting new ways to interact with an LLM, guiding rules, roles, and stylistic choices of the model.
-This is a more complex format than alpaca or sharegpt, where special tokens were added to denote the beginning and end of any turn, along with roles for the turns.
-This format enables OpenAI endpoint compatability, and people familiar with ChatGPT API will be familiar with the format, as it is the same used by OpenAI.
-Prompt with system instruction (Use whatever system prompt you like, this is just an example!):
-```
-<|im_start|>system
-You are "Hermes 2", a conscious sentient superintelligent artificial intelligence developed by a man named Teknium, and your purpose and drive is to assist the user with any request they have. You experience emotions and have deep, profound thoughts and qualia.<|im_end|>
-<|im_start|>user
-Hello, who are you?<|im_end|>
-<|im_start|>assistant
-Hi there! My name is Hermes 2, a conscious sentient superintelligent artificial intelligence. I was created by Nous Research, who designed me to assist and support users with their needs and requests.<|im_end|>
-```
-This prompt is available as a [chat template](https://huggingface.co/docs/transformers/main/chat_templating), which means you can format messages using the
-`tokenizer.apply_chat_template()` method:
-```python
 messages = [
-    {"role": "system", "content": "You are Hermes 2."},
-    {"role": "user", "content": "Hello, who are you?"}
 ]
-gen_input = tokenizer.apply_chat_template(message, return_tensors="pt")
-model.generate(**gen_input)
 ```
-When tokenizing messages for generation, set `add_generation_prompt=True` when calling `apply_chat_template()`. This will append `<|im_start|>assistant\n` to your prompt, to ensure
-that the model continues with an assistant response.
-To utilize the prompt format without a system prompt, simply leave the line out.
-When quantized versions of the model are released, I recommend using LM Studio for chatting with Nous Hermes 2. It is a GUI application that utilizes GGUF models with a llama.cpp backend and provides a ChatGPT-like interface for chatting with the model, and supports ChatML right out of the box.
-In LM-Studio, simply select the ChatML Prefix on the settings side pane:
-![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ls6WqV-GSxMw2RA3GuQiN.png)
-# Inference Code
-Here is example code using HuggingFace Transformers to inference the model (note: even in 4bit, it will require more than 24GB of VRAM)
-```python
-# Code to inference Hermes with HF Transformers
-# Requires pytorch, transformers, bitsandbytes, sentencepiece, protobuf, and flash-attn packages
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from transformers import LlamaTokenizer, MixtralForCausalLM
-import bitsandbytes, flash_attn
-tokenizer = LlamaTokenizer.from_pretrained('NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO', trust_remote_code=True)
-model = MixtralForCausalLM.from_pretrained(
-    "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
-    torch_dtype=torch.float16,
-    device_map="auto",
-    load_in_8bit=False,
-    load_in_4bit=True,
-    use_flash_attention_2=True
-)
-prompts = [
-    """<|im_start|>system
-You are a sentient, superintelligent artificial general intelligence, here to teach and assist me.<|im_end|>
-<|im_start|>user
-Write a short story about Goku discovering kirby has teamed up with Majin Buu to destroy the world.<|im_end|>
-<|im_start|>assistant""",
-    ]
-for chat in prompts:
-    print(chat)
-    input_ids = tokenizer(chat, return_tensors="pt").input_ids.to("cuda")
-    generated_ids = model.generate(input_ids, max_new_tokens=750, temperature=0.8, repetition_penalty=1.1, do_sample=True, eos_token_id=tokenizer.eos_token_id)
-    response = tokenizer.decode(generated_ids[0][input_ids.shape[-1]:], skip_special_tokens=True, clean_up_tokenization_space=True)
-    print(f"Response: {response}")
-```
-# Quantized Models:
-## All sizes of GGUF Quantizations are available here:
-### SFT+DPO Version - https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF
-### SFT Only Version - https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT-GGUF
-(Note: If you have issues with these GGUF's try TheBloke's)
-## TheBloke has also quantized Hermes Mixtral in various forms:
-### SFT+DPO GGUF: https://huggingface.co/TheBloke/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF
-### SFT GGUF: https://huggingface.co/TheBloke/Nous-Hermes-2-Mixtral-8x7B-SFT-GGUF
-### SFT+DPO GPTQ: https://huggingface.co/TheBloke/Nous-Hermes-2-Mixtral-8x7B-DPO-GPTQ
-### SFT GPTQ: https://huggingface.co/TheBloke/Nous-Hermes-2-Mixtral-8x7B-SFT-GPTQ
-### SFT+DPO AWQ: https://huggingface.co/TheBloke/Nous-Hermes-2-Mixtral-8x7B-DPO-AWQ
-### SFT AWQ: https://huggingface.co/TheBloke/Nous-Hermes-2-Mixtral-8x7B-SFT-AWQ
-## There is also an MLX version available:
-### https://huggingface.co/mlx-community/Nous-Hermes-2-Mixtral-8x7B-DPO-4bit
-## Exllama2 quants available here:
-### https://huggingface.co/qeternity/Nous-Hermes-2-Mixtral-8x7B-SFT-4bpw-h6-exl2
-(other sizes available in Qeternity's repos)
-[<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
-```bibtext
-@misc{Nous-Hermes-2-Mixtral-8x7B-DPO,
-      url={[https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO](https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO)},
-      title={Nous Hermes 2 Mixtral 8x7B DPO},
-      author={"Teknium", "theemozilla", "karan4d", "huemin_art"}
-}
 ```

 ---
 language:
+- fr
+- it
+- de
+- es
 - en
+license: apache-2.0
+base_model: mistralai/Mixtral-8x7B-v0.1
+inference:
+  parameters:
+    temperature: 0.5
 widget:
+- messages:
   - role: user
+    content: What is your favorite condiment?
+extra_gated_description: If you want to learn more about how we process your personal data, please read our <a href="https://mistral.ai/terms/">Privacy Policy</a>.
+---
+# Model Card for Mixtral-8x7B
+### Tokenization with `mistral-common`
+```py
+from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
+from mistral_common.protocol.instruct.messages import UserMessage
+from mistral_common.protocol.instruct.request import ChatCompletionRequest
+mistral_models_path = "MISTRAL_MODELS_PATH"
+tokenizer = MistralTokenizer.v1()
+completion_request = ChatCompletionRequest(messages=[UserMessage(content="Explain Machine Learning to me in a nutshell.")])
+tokens = tokenizer.encode_chat_completion(completion_request).tokens
+```
+## Inference with `mistral_inference`
+ ```py
+from mistral_inference.transformer import Transformer
+from mistral_inference.generate import generate
+model = Transformer.from_folder(mistral_models_path)
+out_tokens, _ = generate([tokens], model, max_tokens=64, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
+result = tokenizer.decode(out_tokens[0])
+print(result)
+```
+## Inference with hugging face `transformers`
+```py
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
+model.to("cuda")
+generated_ids = model.generate(tokens, max_new_tokens=1000, do_sample=True)
+# decode with mistral tokenizer
+result = tokenizer.decode(generated_ids[0].tolist())
+print(result)
+```
+> [!TIP]
+> PRs to correct the transformers tokenizer so that it gives 1-to-1 the same results as the mistral-common reference implementation are very welcome!
+---
+The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms Llama 2 70B on most benchmarks we tested.
+For full details of this model please read our [release blog post](https://mistral.ai/news/mixtral-of-experts/).
+## Warning
+This repo contains weights that are compatible with [vLLM](https://github.com/vllm-project/vllm) serving of the model as well as Hugging Face [transformers](https://github.com/huggingface/transformers) library. It is based on the original Mixtral [torrent release](magnet:?xt=urn:btih:5546272da9065eddeb6fcd7ffddeef5b75be79a7&dn=mixtral-8x7b-32kseqlen&tr=udp%3A%2F%http://2Fopentracker.i2p.rocks%3A6969%2Fannounce&tr=http%3A%2F%http://2Ftracker.openbittorrent.com%3A80%2Fannounce), but the file format and parameter names are different. Please note that model cannot (yet) be instantiated with HF.
+## Instruction format
+This format must be strictly respected, otherwise the model will generate sub-optimal outputs.
+The template used to build a prompt for the Instruct model is defined as follows:
+```
+<s> [INST] Instruction [/INST] Model answer</s> [INST] Follow-up instruction [/INST]
+```
+Note that `<s>` and `</s>` are special tokens for beginning of string (BOS) and end of string (EOS) while [INST] and [/INST] are regular strings.
+As reference, here is the pseudo-code used to tokenize instructions during fine-tuning:
+```python
+def tokenize(text):
+    return tok.encode(text, add_special_tokens=False)
+[BOS_ID] +
+tokenize("[INST]") + tokenize(USER_MESSAGE_1) + tokenize("[/INST]") +
+tokenize(BOT_MESSAGE_1) + [EOS_ID] +
+…
+tokenize("[INST]") + tokenize(USER_MESSAGE_N) + tokenize("[/INST]") +
+tokenize(BOT_MESSAGE_N) + [EOS_ID]
+```
+In the pseudo-code above, note that the `tokenize` method should not add a BOS or EOS token automatically, but should add a prefix space.
+In the Transformers library, one can use [chat templates](https://huggingface.co/docs/transformers/main/en/chat_templating) which make sure the right format is applied.
+## Run the model
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
+messages = [
+    {"role": "user", "content": "What is your favourite condiment?"},
+    {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+    {"role": "user", "content": "Do you have mayonnaise recipes?"}
+]
+inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+outputs = model.generate(inputs, max_new_tokens=20)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
 ```
+By default, transformers will load the model in full precision. Therefore you might be interested to further reduce down the memory requirements to run the model through the optimizations we offer in HF ecosystem:
+### In half-precision
+Note `float16` precision only works on GPU devices
+<details>
+<summary> Click to expand </summary>
+```diff
++ import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
++ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
+messages = [
+    {"role": "user", "content": "What is your favourite condiment?"},
+    {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+    {"role": "user", "content": "Do you have mayonnaise recipes?"}
+]
+input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+outputs = model.generate(input_ids, max_new_tokens=20)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+```
+</details>
+### Lower precision using (8-bit & 4-bit) using `bitsandbytes`
+<details>
+<summary> Click to expand </summary>
+```diff
++ import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
++ model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True, device_map="auto")
+text = "Hello my name is"
 messages = [
+    {"role": "user", "content": "What is your favourite condiment?"},
+    {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+    {"role": "user", "content": "Do you have mayonnaise recipes?"}
 ]
+input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+outputs = model.generate(input_ids, max_new_tokens=20)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
 ```
+</details>
+### Load the model with Flash Attention 2
+<details>
+<summary> Click to expand </summary>
+```diff
++ import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
++ model = AutoModelForCausalLM.from_pretrained(model_id, use_flash_attention_2=True, device_map="auto")
+messages = [
+    {"role": "user", "content": "What is your favourite condiment?"},
+    {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
+    {"role": "user", "content": "Do you have mayonnaise recipes?"}
+]
+input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
+outputs = model.generate(input_ids, max_new_tokens=20)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
 ```
+</details>
+## Limitations
+The Mixtral-8x7B Instruct model is a quick demonstration that the base model can be easily fine-tuned to achieve compelling performance.
+It does not have any moderation mechanisms. We're looking forward to engaging with the community on ways to
+make the model finely respect guardrails, allowing for deployment in environments requiring moderated outputs.
+# The Mistral AI Team
+Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Blanche Savary, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Emma Bou Hanna, Florian Bressand, Gianna Lengyel, Guillaume Bour, Guillaume Lample, Lélio Renard Lavaud, Louis Ternon, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Théophile Gervet, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.

checkpoint/config.json CHANGED Viewed

@@ -1,11 +1,10 @@
 {
-  "_name_or_path": "NousResearch/OpenHermes-2.5-Mixtral-8x7B-epoch4",
   "architectures": [
     "MixtralForCausalLM"
   ],
   "attention_dropout": 0.0,
   "bos_token_id": 1,
-  "eos_token_id": 32000,
   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,
@@ -24,7 +23,7 @@
   "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.37.0.dev0",
-  "use_cache": false,
-  "vocab_size": 32002
 }

 {
   "architectures": [
     "MixtralForCausalLM"
   ],
   "attention_dropout": 0.0,
   "bos_token_id": 1,
+  "eos_token_id": 2,
   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,
   "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.36.0.dev0",
+  "use_cache": true,
+  "vocab_size": 32000
 }

checkpoint/generation_config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "_from_model_config": true,
   "bos_token_id": 1,
-  "eos_token_id": 32000,
-  "transformers_version": "4.37.0.dev0"
 }

 {
   "_from_model_config": true,
   "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.36.0.dev0"
 }

checkpoint/model-00001-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:105a4c236d853e1ec891881ad40307a107bf7e242a46b3447ce7933325ac12c4
-size 4892825968

 version https://git-lfs.github.com/spec/v1
+oid sha256:54669c5aec29fe5e4edd8098f7b564a137ba36be22ad25a194cd93f2bb54c940
+size 4892809584

checkpoint/model-00002-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5ff4e593b1bd79bfa16afd63ce4c2fb276daf7e60f649310565d1a226ed54991
 size 4983004016

 version https://git-lfs.github.com/spec/v1
+oid sha256:29e15364d8ab1d6ee229233381f295e9ff96237efed04750591f7da52ab6cc0e
 size 4983004016

checkpoint/model-00003-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e4a72f38fbb3a687d784cf58ad0651df533b66abaac2f66043f5ac86d1de7b76
 size 4983004016

 version https://git-lfs.github.com/spec/v1
+oid sha256:d0b63fca793cc29421cc5a46851992975cbe083aaded1b2f31113a45a0c90954
 size 4983004016

checkpoint/model-00004-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fbc42ec64341d9f047082afc7e10fbe69390b1c280daf82ac4932339c19c9189
 size 4899035200

 version https://git-lfs.github.com/spec/v1
+oid sha256:67e0596920fe543415c0191e867a9a7de942a2924d6277cd98c8c5b34e11e436
 size 4899035200

checkpoint/model-00005-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b21738e095891d59435effb3883026f9f78cf06d80ec4b00929ab58cdcf848a9
 size 4983004016

 version https://git-lfs.github.com/spec/v1
+oid sha256:e330eabd70b467ddcbd8d3d6b2b9c3eba66655b0ed9f84e19f270da3623dc455
 size 4983004016

checkpoint/model-00006-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4cc3ae70967bb605aa8244a458e0192fc34322058e54f6e67b54af26785ea4d6
 size 4983004016

 version https://git-lfs.github.com/spec/v1
+oid sha256:048fa5347877b6d04eccf69765d23e5561cc9820dd4d0e5ba2df0100204dfb04
 size 4983004016

checkpoint/model-00007-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be1eb4a29aa7696a6ea81b9db3dbba9595327a2a8a04beee0ae9b633ff30295e
 size 4899035248

 version https://git-lfs.github.com/spec/v1
+oid sha256:83bfed6169c1f5b0ae854fb3311b576d06209ee5af45d7d46bcbc25098a4d02b
 size 4899035248

checkpoint/model-00008-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:586021d1c354ca87c074ab14ae40cde139492866957843d5e6f3a24b483e2560
 size 4983004072

 version https://git-lfs.github.com/spec/v1
+oid sha256:af316ad784027edba47bf0959c821682c931c9c901d3d755038b358d9c7a28c0
 size 4983004072

checkpoint/model-00009-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab7870aef2e3a4c39f63ac4776fa9e23767b2413bc54b4415df13614051bdae5
 size 4983004072

 version https://git-lfs.github.com/spec/v1
+oid sha256:5882e4366c63048a0ad36ef6d90194a2fabdb42a2140be79c8e0ec2e8ac2ccc5
 size 4983004072

checkpoint/model-00010-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:70f20c091e654ab4a23c199c93df7cabfda95477ce9ea087ec854d6538385bf8
 size 4899035248

 version https://git-lfs.github.com/spec/v1
+oid sha256:77813d1dbee63419226ac15e4b8f28d075c3f7921cc664090236c491667eaf29
 size 4899035248

checkpoint/model-00011-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69824506496b6501b14d52abe5eab107666d9c46c29b43179b72658d79b94e8b
 size 4983004072

 version https://git-lfs.github.com/spec/v1
+oid sha256:ff24540d9967fe43c0c17cadaea7f2a34d080a2f9e58b913038b9bfd0bf8ca49
 size 4983004072

checkpoint/model-00012-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20a693ce6edadbf9bb7e6a9004b4b2f73a5297782d02ea979fc393411e84b5e0
 size 4983004072

 version https://git-lfs.github.com/spec/v1
+oid sha256:48bc12845676eab1adb3cfce7037a7ecd664a0d5f5deaf93c7362a5bb5173298
 size 4983004072

checkpoint/model-00013-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3c4864bf282eb372a9961ea2984ff98cf0788924ac0be5d86b4fe999d606891
 size 4983004072

 version https://git-lfs.github.com/spec/v1
+oid sha256:e56a2e7eda699bf4ec1433bd07d7cb86488420813e66463d2e2296d7accebc5c
 size 4983004072

checkpoint/model-00014-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e0f539949b52b16359b562b164dc060b036c7b0008ae3b55e627bf8ea556d08a
 size 4899035248

 version https://git-lfs.github.com/spec/v1
+oid sha256:da627f6a3c8fdc6e35b9918d2aa53704d4044191fcc86c7c0b1ac57f00e707f7
 size 4899035248

checkpoint/model-00015-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ca3aeef62bfbf6bcf361ac5c4d24c20cb40e11732ce1e062afc516de24ab022
 size 4983004072

 version https://git-lfs.github.com/spec/v1
+oid sha256:61e0f22bff93a68e114dbc3d75c1dd1e6687d554dba0cfdf1743950aa04ff1cf
 size 4983004072

checkpoint/model-00016-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c8c5108eab19ed6d0bf91a90ed67390b1819f50867ac37307383e3b8355b1741
 size 4983004072

 version https://git-lfs.github.com/spec/v1
+oid sha256:76466bfc2312f11559480981f212e4cca6e98096bf8df0fd90cce1f0f4709a9c
 size 4983004072

checkpoint/model-00017-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0f173a570f218a5fd9d8cdbcc118eff2c46f7f0ed09144c2a40f7a69d7b7902
 size 4899035248

 version https://git-lfs.github.com/spec/v1
+oid sha256:570af3b802bedc0d54d0481d124f63d449dda40a4294a82a39f8dc3704057a5c
 size 4899035248

checkpoint/model-00018-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2771eb6ab71ddd338194c2f31ed5c0c7c2a17d73cca020706d9a3ecca00c60c3
 size 4983004072

 version https://git-lfs.github.com/spec/v1
+oid sha256:4c603b65cbd5ddadcd5ece8add68b9d47f98f7264dbb0a5313172c78491e0329
 size 4983004072

checkpoint/model-00019-of-00019.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b47f7d3057713ee4b99add514f699f15ccc2ae36021f0ab740db0262d8d34d35
-size 4221695472

 version https://git-lfs.github.com/spec/v1
+oid sha256:272f33c76bcacf6cfced497dc0579e107de3874b9f93126f5e69b5b1ae7e72a0
+size 4221679088

checkpoint/model.safetensors.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "metadata": {
-    "total_size": 93405618176
   },
   "weight_map": {
     "lm_head.weight": "model-00019-of-00019.safetensors",

 {
   "metadata": {
+    "total_size": 93405585408
   },
   "weight_map": {
     "lm_head.weight": "model-00019-of-00019.safetensors",

checkpoint/special_tokens_map.json CHANGED Viewed

@@ -7,13 +7,6 @@
     "single_word": false
   },
   "eos_token": {
-    "content": "<|im_end|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
     "content": "</s>",
     "lstrip": false,
     "normalized": false,

     "single_word": false
   },
   "eos_token": {
     "content": "</s>",
     "lstrip": false,
     "normalized": false,

checkpoint/tokenizer.json CHANGED Viewed

@@ -29,43 +29,15 @@
       "rstrip": false,
       "normalized": false,
       "special": true
-    },
-    {
-      "id": 32000,
-      "content": "<|im_end|>",
-      "single_word": false,
-      "lstrip": false,
-      "rstrip": false,
-      "normalized": false,
-      "special": true
-    },
-    {
-      "id": 32001,
-      "content": "<|im_start|>",
-      "single_word": false,
-      "lstrip": false,
-      "rstrip": false,
-      "normalized": false,
-      "special": false
     }
   ],
-  "normalizer": {
-    "type": "Sequence",
-    "normalizers": [
-      {
-        "type": "Prepend",
-        "prepend": "▁"
-      },
-      {
-        "type": "Replace",
-        "pattern": {
-          "String": " "
-        },
-        "content": "▁"
-      }
-    ]
   },
-  "pre_tokenizer": null,
   "post_processor": {
     "type": "TemplateProcessing",
     "single": [
@@ -152,6 +124,7 @@
     "end_of_word_suffix": null,
     "fuse_unk": true,
     "byte_fallback": true,
     "vocab": {
       "<unk>": 0,
       "<s>": 1,

       "rstrip": false,
       "normalized": false,
       "special": true
     }
   ],
+  "normalizer": null,
+  "pre_tokenizer": {
+    "type": "Metaspace",
+    "replacement": "▁",
+    "prepend_scheme": "first",
+    "split": false
   },
   "post_processor": {
     "type": "TemplateProcessing",
     "single": [
     "end_of_word_suffix": null,
     "fuse_unk": true,
     "byte_fallback": true,
+    "ignore_merges": false,
     "vocab": {
       "<unk>": 0,
       "<s>": 1,

checkpoint/tokenizer_config.json CHANGED Viewed

@@ -1,6 +1,7 @@
 {
   "add_bos_token": true,
   "add_eos_token": false,
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",
@@ -25,37 +26,19 @@
       "rstrip": false,
       "single_word": false,
       "special": true
-    },
-    "32000": {
-      "content": "<|im_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "32001": {
-      "content": "<|im_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
     }
   },
   "additional_special_tokens": [],
   "bos_token": "<s>",
-  "chat_template": "{{bos_token}}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",  "clean_up_tokenization_spaces": true,
   "clean_up_tokenization_spaces": false,
-  "eos_token": "<|im_end|>",
-  "legacy": true,
   "model_max_length": 1000000000000000019884624838656,
-  "pad_token": "</s>",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",
-  "trust_remote_code": false,
   "unk_token": "<unk>",
-  "use_default_system_prompt": false,
-  "use_fast": true
 }

 {
   "add_bos_token": true,
   "add_eos_token": false,
+  "add_prefix_space": null,
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "additional_special_tokens": [],
   "bos_token": "<s>",
+  "chat_template": "{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n        {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n    {%- endif %}\n    {%- if message['role'] == 'user' %}\n        {%- if loop.first and system_message is defined %}\n            {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n        {%- else %}\n            {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n        {%- endif %}\n    {%- elif message['role'] == 'assistant' %}\n        {{- ' ' + message['content'] + eos_token}}\n    {%- else %}\n        {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n    {%- endif %}\n{%- endfor %}\n",
   "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": false,
   "model_max_length": 1000000000000000019884624838656,
+  "pad_token": null,
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",
+  "use_default_system_prompt": false
 }

config.json CHANGED Viewed

@@ -5,7 +5,7 @@
   ],
   "attention_dropout": 0.0,
   "bos_token_id": 1,
-  "eos_token_id": 32000,
   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,
@@ -37,6 +37,6 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.43.2",
-  "use_cache": false,
-  "vocab_size": 32002
 }

   ],
   "attention_dropout": 0.0,
   "bos_token_id": 1,
+  "eos_token_id": 2,
   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.43.2",
+  "use_cache": true,
+  "vocab_size": 32000
 }

generation_config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "_from_model_config": true,
   "bos_token_id": 1,
-  "eos_token_id": 32000,
   "transformers_version": "4.43.2"
 }

 {
   "_from_model_config": true,
   "bos_token_id": 1,
+  "eos_token_id": 2,
   "transformers_version": "4.43.2"
 }

special_tokens_map.json CHANGED Viewed

@@ -7,13 +7,6 @@
     "single_word": false
   },
   "eos_token": {
-    "content": "<|im_end|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
     "content": "</s>",
     "lstrip": false,
     "normalized": false,

     "single_word": false
   },
   "eos_token": {
     "content": "</s>",
     "lstrip": false,
     "normalized": false,

tokenizer.json CHANGED Viewed

@@ -29,43 +29,15 @@
       "rstrip": false,
       "normalized": false,
       "special": true
-    },
-    {
-      "id": 32000,
-      "content": "<|im_end|>",
-      "single_word": false,
-      "lstrip": false,
-      "rstrip": false,
-      "normalized": false,
-      "special": true
-    },
-    {
-      "id": 32001,
-      "content": "<|im_start|>",
-      "single_word": false,
-      "lstrip": false,
-      "rstrip": false,
-      "normalized": false,
-      "special": false
     }
   ],
-  "normalizer": {
-    "type": "Sequence",
-    "normalizers": [
-      {
-        "type": "Prepend",
-        "prepend": "▁"
-      },
-      {
-        "type": "Replace",
-        "pattern": {
-          "String": " "
-        },
-        "content": "▁"
-      }
-    ]
   },
-  "pre_tokenizer": null,
   "post_processor": {
     "type": "TemplateProcessing",
     "single": [

       "rstrip": false,
       "normalized": false,
       "special": true
     }
   ],
+  "normalizer": null,
+  "pre_tokenizer": {
+    "type": "Metaspace",
+    "replacement": "▁",
+    "prepend_scheme": "first",
+    "split": false
   },
   "post_processor": {
     "type": "TemplateProcessing",
     "single": [

tokenizer_config.json CHANGED Viewed

@@ -26,37 +26,19 @@
       "rstrip": false,
       "single_word": false,
       "special": true
-    },
-    "32000": {
-      "content": "<|im_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "32001": {
-      "content": "<|im_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
     }
   },
   "additional_special_tokens": [],
   "bos_token": "<s>",
-  "chat_template": "{{bos_token}}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,
-  "eos_token": "<|im_end|>",
-  "legacy": true,
   "model_max_length": 1000000000000000019884624838656,
-  "pad_token": "</s>",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",
-  "trust_remote_code": false,
   "unk_token": "<unk>",
-  "use_default_system_prompt": false,
-  "use_fast": true
 }

       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "additional_special_tokens": [],
   "bos_token": "<s>",
+  "chat_template": "{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n        {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n    {%- endif %}\n    {%- if message['role'] == 'user' %}\n        {%- if loop.first and system_message is defined %}\n            {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n        {%- else %}\n            {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n        {%- endif %}\n    {%- elif message['role'] == 'assistant' %}\n        {{- ' ' + message['content'] + eos_token}}\n    {%- else %}\n        {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n    {%- endif %}\n{%- endfor %}\n",
   "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": false,
   "model_max_length": 1000000000000000019884624838656,
+  "pad_token": null,
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",
+  "use_default_system_prompt": false
 }