Spaces:
Runtime error
Runtime error
gorkemgoknar
commited on
Commit
•
28f7799
1
Parent(s):
87be2eb
add Yi-6B-200K
Browse files
app.py
CHANGED
@@ -106,7 +106,7 @@ system_message = system_message.replace("CURRENT_DATE", str(datetime.date.today(
|
|
106 |
|
107 |
# MISTRAL ONLY
|
108 |
default_system_understand_message = (
|
109 |
-
"I understand, I am a
|
110 |
)
|
111 |
system_understand_message = os.environ.get(
|
112 |
"SYSTEM_UNDERSTAND_MESSAGE", default_system_understand_message
|
@@ -132,24 +132,26 @@ ROLE_PROMPTS["AI Beard The Pirate"]= pirate_system_message
|
|
132 |
|
133 |
|
134 |
|
135 |
-
### WILL USE LOCAL MISTRAL OR ZEPHYR
|
|
|
136 |
|
137 |
from huggingface_hub import hf_hub_download
|
138 |
print("Downloading LLM")
|
139 |
-
|
140 |
-
|
141 |
-
print("Downloading Zephyr")
|
142 |
#Zephyr
|
143 |
hf_hub_download(repo_id="TheBloke/zephyr-7B-beta-GGUF", local_dir=".", filename="zephyr-7b-beta.Q5_K_M.gguf")
|
144 |
-
# use new gguf format
|
145 |
zephyr_model_path="./zephyr-7b-beta.Q5_K_M.gguf"
|
146 |
|
147 |
-
print("Downloading Mistral")
|
148 |
#Mistral
|
149 |
hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", local_dir=".", filename="mistral-7b-instruct-v0.1.Q5_K_M.gguf")
|
150 |
-
# use new gguf format
|
151 |
mistral_model_path="./mistral-7b-instruct-v0.1.Q5_K_M.gguf"
|
152 |
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
from llama_cpp import Llama
|
155 |
# set GPU_LAYERS to 15 if you have a 8GB GPU so both models can fit in
|
@@ -164,7 +166,10 @@ print("Running LLM Mistral")
|
|
164 |
llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
165 |
|
166 |
print("Running LLM Zephyr")
|
167 |
-
llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=GPU_LAYERS-
|
|
|
|
|
|
|
168 |
|
169 |
|
170 |
# Mistral formatter
|
@@ -230,8 +235,15 @@ def generate_local(
|
|
230 |
formatted_prompt = format_prompt_zephyr(prompt, history,system_message=sys_message)
|
231 |
llm = llm_zephyr
|
232 |
else:
|
233 |
-
|
234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
llm = llm_mistral
|
236 |
|
237 |
|
@@ -680,9 +692,11 @@ EXAMPLES = [
|
|
680 |
[[],"AI Assistant","Speak in French, tell me how are you doing?"],
|
681 |
[[],"AI Assistant","Antworten Sie mir von nun an auf Deutsch"],
|
682 |
[[],"AI Beard The Pirate","Who are you?"],
|
|
|
|
|
683 |
]
|
684 |
|
685 |
-
MODELS = ["Mistral","Zephyr"]
|
686 |
|
687 |
OTHER_HTML=f"""<div>
|
688 |
<a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>
|
@@ -699,7 +713,7 @@ with gr.Blocks(title=title) as demo:
|
|
699 |
with gr.Row():
|
700 |
model_selected = gr.Dropdown(
|
701 |
label="Select Instuct LLM Model to Use",
|
702 |
-
info="Zephyr
|
703 |
choices=MODELS,
|
704 |
max_choices=1,
|
705 |
value=MODELS[0],
|
@@ -789,7 +803,8 @@ It relies on following models :
|
|
789 |
Speech to Text : [Whisper-large-v2](https://sanchit-gandhi-whisper-large-v2.hf.space/) as an ASR model, to transcribe recorded audio to text. It is called through a [gradio client](https://www.gradio.app/docs/client).
|
790 |
LLM Mistral : [Mistral-7b-instruct](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) as the chat model, GGUF Q5_K_M quantized version used locally via llama_cpp[huggingface_hub](TheBloke/Mistral-7B-Instruct-v0.1-GGUF).
|
791 |
LLM Zephyr : [Zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) as the chat model. GGUF Q5_K_M quantized version used locally via llama_cpp from [huggingface.co/TheBloke](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF).
|
792 |
-
|
|
|
793 |
|
794 |
Note:
|
795 |
- By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml
|
|
|
106 |
|
107 |
# MISTRAL ONLY
|
108 |
default_system_understand_message = (
|
109 |
+
"I understand, I am a ##LLM_MODEL### chatbot with speech by Coqui team."
|
110 |
)
|
111 |
system_understand_message = os.environ.get(
|
112 |
"SYSTEM_UNDERSTAND_MESSAGE", default_system_understand_message
|
|
|
132 |
|
133 |
|
134 |
|
135 |
+
### WILL USE LOCAL MISTRAL OR ZEPHYR OR YI
|
136 |
+
### While zephyr and yi will use half GPU to fit all into 16GB, XTTS will use at most 5GB VRAM
|
137 |
|
138 |
from huggingface_hub import hf_hub_download
|
139 |
print("Downloading LLM")
|
140 |
+
print("Downloading Zephyr 7B beta")
|
|
|
|
|
141 |
#Zephyr
|
142 |
hf_hub_download(repo_id="TheBloke/zephyr-7B-beta-GGUF", local_dir=".", filename="zephyr-7b-beta.Q5_K_M.gguf")
|
|
|
143 |
zephyr_model_path="./zephyr-7b-beta.Q5_K_M.gguf"
|
144 |
|
145 |
+
print("Downloading Mistral 7B Instruct")
|
146 |
#Mistral
|
147 |
hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", local_dir=".", filename="mistral-7b-instruct-v0.1.Q5_K_M.gguf")
|
|
|
148 |
mistral_model_path="./mistral-7b-instruct-v0.1.Q5_K_M.gguf"
|
149 |
|
150 |
+
print("Downloading Yi-6B-200k")
|
151 |
+
#Yi-6B-200K
|
152 |
+
hf_hub_download(repo_id="TheBloke/Yi-6B-GGUF", local_dir=".", filename="yi-6b-200k.Q5_K_M.gguf")
|
153 |
+
yi_model_path="./yi-6b-200k.Q5_K_M.gguf"
|
154 |
+
|
155 |
|
156 |
from llama_cpp import Llama
|
157 |
# set GPU_LAYERS to 15 if you have a 8GB GPU so both models can fit in
|
|
|
166 |
llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
167 |
|
168 |
print("Running LLM Zephyr")
|
169 |
+
llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=GPU_LAYERS-5,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
170 |
+
|
171 |
+
print("Running Yi LLM")
|
172 |
+
llm_zephyr = Llama(model_path=yi_model_path,n_gpu_layers=GPU_LAYERS-5,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
173 |
|
174 |
|
175 |
# Mistral formatter
|
|
|
235 |
formatted_prompt = format_prompt_zephyr(prompt, history,system_message=sys_message)
|
236 |
llm = llm_zephyr
|
237 |
else:
|
238 |
+
if "yi" in llm_model.lower():
|
239 |
+
llm_provider= "01.ai"
|
240 |
+
llm_model = "Yi"
|
241 |
+
else:
|
242 |
+
llm_provider= "Mistral"
|
243 |
+
llm_model = "Mistral"
|
244 |
+
sys_message= system_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
|
245 |
+
sys_system_understand_message = system_understand_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
|
246 |
+
formatted_prompt = format_prompt_mistral(prompt, history,system_message=sys_message,system_understand_message=sys_system_understand_message)
|
247 |
llm = llm_mistral
|
248 |
|
249 |
|
|
|
692 |
[[],"AI Assistant","Speak in French, tell me how are you doing?"],
|
693 |
[[],"AI Assistant","Antworten Sie mir von nun an auf Deutsch"],
|
694 |
[[],"AI Beard The Pirate","Who are you?"],
|
695 |
+
[[],"AI Beard The Pirate","告诉我你的冒险经历"],
|
696 |
+
|
697 |
]
|
698 |
|
699 |
+
MODELS = ["Mistral 7B Instruct","Zephyr 7B Beta","Yi 6B"]
|
700 |
|
701 |
OTHER_HTML=f"""<div>
|
702 |
<a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>
|
|
|
713 |
with gr.Row():
|
714 |
model_selected = gr.Dropdown(
|
715 |
label="Select Instuct LLM Model to Use",
|
716 |
+
info="Mistral, Zephyr, Yi : 5-bit GGUF models are preloaded",
|
717 |
choices=MODELS,
|
718 |
max_choices=1,
|
719 |
value=MODELS[0],
|
|
|
803 |
Speech to Text : [Whisper-large-v2](https://sanchit-gandhi-whisper-large-v2.hf.space/) as an ASR model, to transcribe recorded audio to text. It is called through a [gradio client](https://www.gradio.app/docs/client).
|
804 |
LLM Mistral : [Mistral-7b-instruct](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) as the chat model, GGUF Q5_K_M quantized version used locally via llama_cpp[huggingface_hub](TheBloke/Mistral-7B-Instruct-v0.1-GGUF).
|
805 |
LLM Zephyr : [Zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) as the chat model. GGUF Q5_K_M quantized version used locally via llama_cpp from [huggingface.co/TheBloke](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF).
|
806 |
+
LLM Yi : [Yi-6B-200K](https://huggingface.co/01-ai/Yi-6B-200K) as the chat model. GGUF Q5_K_M quantized version used locally via llama_cpp from [huggingface.co/TheBloke](https://huggingface.co/TheBloke/Yi-6B-200K-GGUF).
|
807 |
+
Text to Speech : [Coqui's XTTS V2](https://huggingface.co/spaces/coqui/xtts) as a Multilingual TTS model, to generate the chatbot answers. This time, the model is hosted locally.
|
808 |
|
809 |
Note:
|
810 |
- By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml
|