Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,41 +1,37 @@
|
|
1 |
import gradio as gr
|
2 |
-
from
|
3 |
-
import os
|
4 |
-
os.system("pip install -U huggingface_hub")
|
5 |
-
os.system("huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct-GGUF qwen2.5-0.5b-instruct-q2_k.gguf --local-dir . --local-dir-use-symlinks False")
|
6 |
-
# Model path or name
|
7 |
-
# ./llama-cli -m <gguf-file-path> \
|
8 |
-
# -co -cnv -p "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." \
|
9 |
-
# -fa -ngl 80 -n 512
|
10 |
-
MODEL_PATH = "./qwen2.5-0.5b-instruct-q2_k.gguf"
|
11 |
|
12 |
-
# Load the model
|
13 |
-
|
|
|
14 |
|
|
|
15 |
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
16 |
-
# Prepare the prompt
|
17 |
-
prompt = system_message
|
18 |
for user_msg, assistant_msg in history:
|
19 |
prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
|
20 |
prompt += f"User: {message}\nAssistant:"
|
21 |
|
22 |
-
# Generate response
|
23 |
response = model(
|
24 |
-
prompt,
|
25 |
-
|
26 |
-
temperature=temperature,
|
27 |
-
top_p=top_p
|
28 |
)
|
29 |
-
return response
|
30 |
|
31 |
-
#
|
|
|
|
|
|
|
32 |
demo = gr.ChatInterface(
|
33 |
respond,
|
34 |
additional_inputs=[
|
35 |
-
gr.Textbox(value="You are a
|
36 |
-
gr.Slider(minimum=
|
37 |
-
gr.Slider(minimum=0.1, maximum=1.5, value=0.
|
38 |
-
gr.Slider(minimum=0.1, maximum=0
|
39 |
],
|
40 |
)
|
41 |
|
|
|
1 |
import gradio as gr
|
2 |
+
from llama_cpp import Llama
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
+
# Load the Qwen GGUF model
|
5 |
+
MODEL_PATH = "./qwen2.5-0.5b-instruct-q2_k.gguf" # Ensure the file exists in this path
|
6 |
+
model = Llama(model_path=MODEL_PATH)
|
7 |
|
8 |
+
# Define the chat function
|
9 |
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
10 |
+
# Prepare the full prompt
|
11 |
+
prompt = f"{system_message}\n"
|
12 |
for user_msg, assistant_msg in history:
|
13 |
prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
|
14 |
prompt += f"User: {message}\nAssistant:"
|
15 |
|
16 |
+
# Generate response using llama-cpp
|
17 |
response = model(
|
18 |
+
prompt,
|
19 |
+
max_tokens=max_tokens,
|
20 |
+
temperature=temperature,
|
21 |
+
top_p=top_p
|
22 |
)
|
|
|
23 |
|
24 |
+
# Extract text response
|
25 |
+
return response["choices"][0]["text"].strip()
|
26 |
+
|
27 |
+
# Define Gradio chat interface
|
28 |
demo = gr.ChatInterface(
|
29 |
respond,
|
30 |
additional_inputs=[
|
31 |
+
gr.Textbox(value="You are a helpful AI assistant.", label="System message"),
|
32 |
+
gr.Slider(minimum=10, maximum=512, value=100, step=10, label="Max new tokens"),
|
33 |
+
gr.Slider(minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature"),
|
34 |
+
gr.Slider(minimum=0.1, maximum=1.0, value=0.75, step=0.05, label="Top-p (nucleus sampling)"),
|
35 |
],
|
36 |
)
|
37 |
|