Spaces:
Runtime error
Runtime error
gorkemgoknar
commited on
Commit
•
374ef04
1
Parent(s):
98ee681
use embedded Mistral
Browse files
app.py
CHANGED
@@ -139,13 +139,14 @@ print("Downloading Zephyr 7B beta")
|
|
139 |
hf_hub_download(repo_id="TheBloke/zephyr-7B-beta-GGUF", local_dir=".", filename="zephyr-7b-beta.Q5_K_M.gguf")
|
140 |
zephyr_model_path="./zephyr-7b-beta.Q5_K_M.gguf"
|
141 |
|
142 |
-
|
143 |
#Mistral
|
144 |
-
|
145 |
-
|
146 |
|
147 |
#print("Downloading Yi-6B")
|
148 |
#Yi-6B
|
|
|
149 |
#hf_hub_download(repo_id="TheBloke/Yi-6B-GGUF", local_dir=".", filename="yi-6b.Q5_K_M.gguf")
|
150 |
#yi_model_path="./yi-6b.Q5_K_M.gguf"
|
151 |
|
@@ -159,9 +160,10 @@ GPU_LAYERS=int(os.environ.get("GPU_LAYERS",35))
|
|
159 |
LLM_STOP_WORDS= ["</s>","<|user|>","/s>","<EOT>","[/INST]"]
|
160 |
|
161 |
LLAMA_VERBOSE=False
|
162 |
-
print("Running
|
163 |
-
|
164 |
-
|
|
|
165 |
|
166 |
|
167 |
print("Running LLM Zephyr")
|
@@ -254,15 +256,12 @@ def generate_local(
|
|
254 |
llm_model = "Yi"
|
255 |
llm = llm_yi
|
256 |
max_tokens= round(max_tokens/2)
|
257 |
-
sys_message= system_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
|
258 |
-
sys_system_understand_message = system_understand_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
|
259 |
-
|
260 |
else:
|
261 |
llm_provider= "Mistral"
|
262 |
llm_model = "Mistral"
|
263 |
llm = llm_mistral
|
264 |
-
|
265 |
-
|
266 |
|
267 |
if "yi" in llm_model.lower():
|
268 |
formatted_prompt = format_prompt_mistral(prompt, history,system_message=sys_message,system_understand_message="")
|
@@ -271,8 +270,8 @@ def generate_local(
|
|
271 |
|
272 |
try:
|
273 |
print("LLM Input:", formatted_prompt)
|
274 |
-
if llm_model=="
|
275 |
-
#
|
276 |
generate_kwargs = dict(
|
277 |
temperature=temperature,
|
278 |
max_new_tokens=max_tokens,
|
@@ -744,7 +743,7 @@ EXAMPLES = [
|
|
744 |
|
745 |
]
|
746 |
|
747 |
-
MODELS = ["
|
748 |
|
749 |
OTHER_HTML=f"""<div>
|
750 |
<a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>
|
|
|
139 |
hf_hub_download(repo_id="TheBloke/zephyr-7B-beta-GGUF", local_dir=".", filename="zephyr-7b-beta.Q5_K_M.gguf")
|
140 |
zephyr_model_path="./zephyr-7b-beta.Q5_K_M.gguf"
|
141 |
|
142 |
+
print("Downloading Mistral 7B Instruct")
|
143 |
#Mistral
|
144 |
+
hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", local_dir=".", filename="mistral-7b-instruct-v0.1.Q5_K_M.gguf")
|
145 |
+
mistral_model_path="./mistral-7b-instruct-v0.1.Q5_K_M.gguf"
|
146 |
|
147 |
#print("Downloading Yi-6B")
|
148 |
#Yi-6B
|
149 |
+
# Note current Yi is text-generation model not an instruct based model
|
150 |
#hf_hub_download(repo_id="TheBloke/Yi-6B-GGUF", local_dir=".", filename="yi-6b.Q5_K_M.gguf")
|
151 |
#yi_model_path="./yi-6b.Q5_K_M.gguf"
|
152 |
|
|
|
160 |
LLM_STOP_WORDS= ["</s>","<|user|>","/s>","<EOT>","[/INST]"]
|
161 |
|
162 |
LLAMA_VERBOSE=False
|
163 |
+
print("Running Mistral")
|
164 |
+
llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
|
165 |
+
#print("Running LLM Mistral as InferenceClient")
|
166 |
+
#llm_mistral = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
|
167 |
|
168 |
|
169 |
print("Running LLM Zephyr")
|
|
|
256 |
llm_model = "Yi"
|
257 |
llm = llm_yi
|
258 |
max_tokens= round(max_tokens/2)
|
|
|
|
|
|
|
259 |
else:
|
260 |
llm_provider= "Mistral"
|
261 |
llm_model = "Mistral"
|
262 |
llm = llm_mistral
|
263 |
+
sys_message= system_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
|
264 |
+
sys_system_understand_message = system_understand_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
|
265 |
|
266 |
if "yi" in llm_model.lower():
|
267 |
formatted_prompt = format_prompt_mistral(prompt, history,system_message=sys_message,system_understand_message="")
|
|
|
270 |
|
271 |
try:
|
272 |
print("LLM Input:", formatted_prompt)
|
273 |
+
if llm_model=="OTHER":
|
274 |
+
# Mistral endpoint too many Queues, wait time..
|
275 |
generate_kwargs = dict(
|
276 |
temperature=temperature,
|
277 |
max_new_tokens=max_tokens,
|
|
|
743 |
|
744 |
]
|
745 |
|
746 |
+
MODELS = ["Zephyr 7B Beta","Mistral 7B Instruct"]
|
747 |
|
748 |
OTHER_HTML=f"""<div>
|
749 |
<a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>
|