voice-chat-with-mistral

Runtime error

App Files Files Community

gorkemgoknar commited on Nov 14, 2023

Commit

374ef04

•

1 Parent(s): 98ee681

use embedded Mistral

Browse files

Files changed (1) hide show

app.py +13 -14

app.py CHANGED Viewed

@@ -139,13 +139,14 @@ print("Downloading Zephyr 7B beta")
 hf_hub_download(repo_id="TheBloke/zephyr-7B-beta-GGUF", local_dir=".", filename="zephyr-7b-beta.Q5_K_M.gguf")
 zephyr_model_path="./zephyr-7b-beta.Q5_K_M.gguf"
-#print("Downloading Mistral 7B Instruct")
 #Mistral
-#hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", local_dir=".", filename="mistral-7b-instruct-v0.1.Q5_K_M.gguf")
-#mistral_model_path="./mistral-7b-instruct-v0.1.Q5_K_M.gguf"
 #print("Downloading Yi-6B")
 #Yi-6B
 #hf_hub_download(repo_id="TheBloke/Yi-6B-GGUF", local_dir=".", filename="yi-6b.Q5_K_M.gguf")
 #yi_model_path="./yi-6b.Q5_K_M.gguf"
@@ -159,9 +160,10 @@ GPU_LAYERS=int(os.environ.get("GPU_LAYERS",35))
 LLM_STOP_WORDS= ["</s>","<|user|>","/s>","<EOT>","[/INST]"]
 LLAMA_VERBOSE=False
-print("Running LLM Mistral as InferenceClient")
-#llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
-llm_mistral = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
 print("Running LLM Zephyr")
@@ -254,15 +256,12 @@ def generate_local(
             llm_model = "Yi"
             llm = llm_yi
             max_tokens= round(max_tokens/2)
-            sys_message= system_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
-            sys_system_understand_message = system_understand_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
         else:
             llm_provider= "Mistral"
             llm_model = "Mistral"
             llm = llm_mistral
-            sys_message= system_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
-            sys_system_understand_message = system_understand_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
         if "yi" in llm_model.lower():
             formatted_prompt = format_prompt_mistral(prompt, history,system_message=sys_message,system_understand_message="")
@@ -271,8 +270,8 @@ def generate_local(
     try:
         print("LLM Input:", formatted_prompt)
-        if llm_model=="Mistral":
-            # USE Mistral endpoint
             generate_kwargs = dict(
                 temperature=temperature,
                 max_new_tokens=max_tokens,
@@ -744,7 +743,7 @@ EXAMPLES = [
 ]
-MODELS = ["Mistral 7B Instruct","Zephyr 7B Beta"]
 OTHER_HTML=f"""<div>
 <a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>

 hf_hub_download(repo_id="TheBloke/zephyr-7B-beta-GGUF", local_dir=".", filename="zephyr-7b-beta.Q5_K_M.gguf")
 zephyr_model_path="./zephyr-7b-beta.Q5_K_M.gguf"
+print("Downloading Mistral 7B Instruct")
 #Mistral
+hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", local_dir=".", filename="mistral-7b-instruct-v0.1.Q5_K_M.gguf")
+mistral_model_path="./mistral-7b-instruct-v0.1.Q5_K_M.gguf"
 #print("Downloading Yi-6B")
 #Yi-6B
+# Note current Yi is text-generation model not an instruct based model
 #hf_hub_download(repo_id="TheBloke/Yi-6B-GGUF", local_dir=".", filename="yi-6b.Q5_K_M.gguf")
 #yi_model_path="./yi-6b.Q5_K_M.gguf"
 LLM_STOP_WORDS= ["</s>","<|user|>","/s>","<EOT>","[/INST]"]
 LLAMA_VERBOSE=False
+print("Running Mistral")
+llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
+#print("Running LLM Mistral as InferenceClient")
+#llm_mistral = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
 print("Running LLM Zephyr")
             llm_model = "Yi"
             llm = llm_yi
             max_tokens= round(max_tokens/2)
         else:
             llm_provider= "Mistral"
             llm_model = "Mistral"
             llm = llm_mistral
+        sys_message= system_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
+        sys_system_understand_message = system_understand_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
         if "yi" in llm_model.lower():
             formatted_prompt = format_prompt_mistral(prompt, history,system_message=sys_message,system_understand_message="")
     try:
         print("LLM Input:", formatted_prompt)
+        if llm_model=="OTHER":
+            # Mistral endpoint too many Queues, wait time..
             generate_kwargs = dict(
                 temperature=temperature,
                 max_new_tokens=max_tokens,
 ]
+MODELS = ["Zephyr 7B Beta","Mistral 7B Instruct"]
 OTHER_HTML=f"""<div>
 <a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>