zhangtao
commited on
Commit
·
85d8949
1
Parent(s):
bb61c2b
继续测试
Browse files- Dockerfile +2 -0
- app.py +9 -4
Dockerfile
CHANGED
@@ -13,4 +13,6 @@ COPY . .
|
|
13 |
|
14 |
RUN wget https://huggingface.co/TheBloke/NeuralHermes-2.5-Mistral-7B-GGUF/resolve/main/neuralhermes-2.5-mistral-7b.Q5_K_M.gguf?download=true -O neuralhermes-2.5-mistral-7b.Q5_K_M.gguf
|
15 |
|
|
|
|
|
16 |
CMD ["python", "app.py"]
|
|
|
13 |
|
14 |
RUN wget https://huggingface.co/TheBloke/NeuralHermes-2.5-Mistral-7B-GGUF/resolve/main/neuralhermes-2.5-mistral-7b.Q5_K_M.gguf?download=true -O neuralhermes-2.5-mistral-7b.Q5_K_M.gguf
|
15 |
|
16 |
+
ENV MPLCONFIGDIR /code/matplotlib/
|
17 |
+
|
18 |
CMD ["python", "app.py"]
|
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
from llama_cpp import Llama
|
|
|
3 |
llm = Llama(model_path="./neuralhermes-2.5-mistral-7b.Q5_K_M.gguf",
|
4 |
n_ctx=32768,
|
5 |
n_threads=2,
|
@@ -7,18 +8,22 @@ llm = Llama(model_path="./neuralhermes-2.5-mistral-7b.Q5_K_M.gguf",
|
|
7 |
|
8 |
|
9 |
def chat_completion(messages, history, system_prompt):
|
10 |
-
messages = [{"role": "system", "content": system_prompt}]
|
|
|
11 |
for human, assistant in history:
|
12 |
messages.append({"role": "user", "content": human})
|
13 |
messages.append({"role": "assistant", "content": assistant})
|
14 |
messages.append({"role": "user", "content": messages})
|
15 |
-
|
|
|
|
|
16 |
response = llm.create_chat_completion(
|
17 |
messages=messages,
|
18 |
stream=False
|
19 |
)
|
20 |
-
|
21 |
-
|
|
|
22 |
# partial_message = ""
|
23 |
# for chunk in response:
|
24 |
# if len(chunk['choices'][0]['delta']) != 0:
|
|
|
1 |
import gradio as gr
|
2 |
from llama_cpp import Llama
|
3 |
+
import json
|
4 |
llm = Llama(model_path="./neuralhermes-2.5-mistral-7b.Q5_K_M.gguf",
|
5 |
n_ctx=32768,
|
6 |
n_threads=2,
|
|
|
8 |
|
9 |
|
10 |
def chat_completion(messages, history, system_prompt):
|
11 |
+
# messages = [{"role": "system", "content": system_prompt}]
|
12 |
+
messages = []
|
13 |
for human, assistant in history:
|
14 |
messages.append({"role": "user", "content": human})
|
15 |
messages.append({"role": "assistant", "content": assistant})
|
16 |
messages.append({"role": "user", "content": messages})
|
17 |
+
message_str = json.dumps(messages, ensure_ascii=False, indent=4)
|
18 |
+
gr.Info(message_str)
|
19 |
+
|
20 |
response = llm.create_chat_completion(
|
21 |
messages=messages,
|
22 |
stream=False
|
23 |
)
|
24 |
+
response_str = json.dumps(response, ensure_ascii=False, indent=4)
|
25 |
+
gr.Info(response_str)
|
26 |
+
return message_str
|
27 |
# partial_message = ""
|
28 |
# for chunk in response:
|
29 |
# if len(chunk['choices'][0]['delta']) != 0:
|