Spaces:

zhangtao103239
/

NeuralHermes-2.5-Mistral-7B-GGUF-Chat

Sleeping

zhangtao commited on Dec 26, 2023

Commit

85d8949

1 Parent(s): bb61c2b

继续测试

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -13,4 +13,6 @@ COPY . .
 RUN wget https://huggingface.co/TheBloke/NeuralHermes-2.5-Mistral-7B-GGUF/resolve/main/neuralhermes-2.5-mistral-7b.Q5_K_M.gguf?download=true -O neuralhermes-2.5-mistral-7b.Q5_K_M.gguf
 CMD ["python", "app.py"]

 RUN wget https://huggingface.co/TheBloke/NeuralHermes-2.5-Mistral-7B-GGUF/resolve/main/neuralhermes-2.5-mistral-7b.Q5_K_M.gguf?download=true -O neuralhermes-2.5-mistral-7b.Q5_K_M.gguf
+ENV MPLCONFIGDIR /code/matplotlib/
 CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import gradio as gr
 from llama_cpp import Llama
 llm = Llama(model_path="./neuralhermes-2.5-mistral-7b.Q5_K_M.gguf",
             n_ctx=32768,
             n_threads=2,
@@ -7,18 +8,22 @@ llm = Llama(model_path="./neuralhermes-2.5-mistral-7b.Q5_K_M.gguf",
 def chat_completion(messages, history, system_prompt):
-    messages = [{"role": "system", "content": system_prompt}]
     for human, assistant in history:
         messages.append({"role": "user", "content": human})
         messages.append({"role": "assistant", "content": assistant})
     messages.append({"role": "user", "content": messages})
-    print(messages)
     response = llm.create_chat_completion(
         messages=messages,
         stream=False
     )
-    print(response)
-    return response
     # partial_message = ""
     # for chunk in response:
     #     if len(chunk['choices'][0]['delta']) != 0:

 import gradio as gr
 from llama_cpp import Llama
+import json
 llm = Llama(model_path="./neuralhermes-2.5-mistral-7b.Q5_K_M.gguf",
             n_ctx=32768,
             n_threads=2,
 def chat_completion(messages, history, system_prompt):
+    # messages = [{"role": "system", "content": system_prompt}]
+    messages = []
     for human, assistant in history:
         messages.append({"role": "user", "content": human})
         messages.append({"role": "assistant", "content": assistant})
     messages.append({"role": "user", "content": messages})
+    message_str = json.dumps(messages, ensure_ascii=False, indent=4)
+    gr.Info(message_str)
     response = llm.create_chat_completion(
         messages=messages,
         stream=False
     )
+    response_str =  json.dumps(response, ensure_ascii=False, indent=4)
+    gr.Info(response_str)
+    return message_str
     # partial_message = ""
     # for chunk in response:
     #     if len(chunk['choices'][0]['delta']) != 0: