gorkemgoknar commited on
Commit
374ef04
1 Parent(s): 98ee681

use embedded Mistral

Browse files
Files changed (1) hide show
  1. app.py +13 -14
app.py CHANGED
@@ -139,13 +139,14 @@ print("Downloading Zephyr 7B beta")
139
  hf_hub_download(repo_id="TheBloke/zephyr-7B-beta-GGUF", local_dir=".", filename="zephyr-7b-beta.Q5_K_M.gguf")
140
  zephyr_model_path="./zephyr-7b-beta.Q5_K_M.gguf"
141
 
142
- #print("Downloading Mistral 7B Instruct")
143
  #Mistral
144
- #hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", local_dir=".", filename="mistral-7b-instruct-v0.1.Q5_K_M.gguf")
145
- #mistral_model_path="./mistral-7b-instruct-v0.1.Q5_K_M.gguf"
146
 
147
  #print("Downloading Yi-6B")
148
  #Yi-6B
 
149
  #hf_hub_download(repo_id="TheBloke/Yi-6B-GGUF", local_dir=".", filename="yi-6b.Q5_K_M.gguf")
150
  #yi_model_path="./yi-6b.Q5_K_M.gguf"
151
 
@@ -159,9 +160,10 @@ GPU_LAYERS=int(os.environ.get("GPU_LAYERS",35))
159
  LLM_STOP_WORDS= ["</s>","<|user|>","/s>","<EOT>","[/INST]"]
160
 
161
  LLAMA_VERBOSE=False
162
- print("Running LLM Mistral as InferenceClient")
163
- #llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
164
- llm_mistral = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
 
165
 
166
 
167
  print("Running LLM Zephyr")
@@ -254,15 +256,12 @@ def generate_local(
254
  llm_model = "Yi"
255
  llm = llm_yi
256
  max_tokens= round(max_tokens/2)
257
- sys_message= system_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
258
- sys_system_understand_message = system_understand_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
259
-
260
  else:
261
  llm_provider= "Mistral"
262
  llm_model = "Mistral"
263
  llm = llm_mistral
264
- sys_message= system_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
265
- sys_system_understand_message = system_understand_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
266
 
267
  if "yi" in llm_model.lower():
268
  formatted_prompt = format_prompt_mistral(prompt, history,system_message=sys_message,system_understand_message="")
@@ -271,8 +270,8 @@ def generate_local(
271
 
272
  try:
273
  print("LLM Input:", formatted_prompt)
274
- if llm_model=="Mistral":
275
- # USE Mistral endpoint
276
  generate_kwargs = dict(
277
  temperature=temperature,
278
  max_new_tokens=max_tokens,
@@ -744,7 +743,7 @@ EXAMPLES = [
744
 
745
  ]
746
 
747
- MODELS = ["Mistral 7B Instruct","Zephyr 7B Beta"]
748
 
749
  OTHER_HTML=f"""<div>
750
  <a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>
 
139
  hf_hub_download(repo_id="TheBloke/zephyr-7B-beta-GGUF", local_dir=".", filename="zephyr-7b-beta.Q5_K_M.gguf")
140
  zephyr_model_path="./zephyr-7b-beta.Q5_K_M.gguf"
141
 
142
+ print("Downloading Mistral 7B Instruct")
143
  #Mistral
144
+ hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", local_dir=".", filename="mistral-7b-instruct-v0.1.Q5_K_M.gguf")
145
+ mistral_model_path="./mistral-7b-instruct-v0.1.Q5_K_M.gguf"
146
 
147
  #print("Downloading Yi-6B")
148
  #Yi-6B
149
+ # Note current Yi is text-generation model not an instruct based model
150
  #hf_hub_download(repo_id="TheBloke/Yi-6B-GGUF", local_dir=".", filename="yi-6b.Q5_K_M.gguf")
151
  #yi_model_path="./yi-6b.Q5_K_M.gguf"
152
 
 
160
  LLM_STOP_WORDS= ["</s>","<|user|>","/s>","<EOT>","[/INST]"]
161
 
162
  LLAMA_VERBOSE=False
163
+ print("Running Mistral")
164
+ llm_mistral = Llama(model_path=mistral_model_path,n_gpu_layers=GPU_LAYERS,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
165
+ #print("Running LLM Mistral as InferenceClient")
166
+ #llm_mistral = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
167
 
168
 
169
  print("Running LLM Zephyr")
 
256
  llm_model = "Yi"
257
  llm = llm_yi
258
  max_tokens= round(max_tokens/2)
 
 
 
259
  else:
260
  llm_provider= "Mistral"
261
  llm_model = "Mistral"
262
  llm = llm_mistral
263
+ sys_message= system_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
264
+ sys_system_understand_message = system_understand_message.replace("##LLM_MODEL###",llm_model).replace("##LLM_MODEL_PROVIDER###",llm_provider)
265
 
266
  if "yi" in llm_model.lower():
267
  formatted_prompt = format_prompt_mistral(prompt, history,system_message=sys_message,system_understand_message="")
 
270
 
271
  try:
272
  print("LLM Input:", formatted_prompt)
273
+ if llm_model=="OTHER":
274
+ # Mistral endpoint too many Queues, wait time..
275
  generate_kwargs = dict(
276
  temperature=temperature,
277
  max_new_tokens=max_tokens,
 
743
 
744
  ]
745
 
746
+ MODELS = ["Zephyr 7B Beta","Mistral 7B Instruct"]
747
 
748
  OTHER_HTML=f"""<div>
749
  <a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>