Spaces:

JimmyK300
/

Qwen2.5-0.5B-instruct

Sleeping

App Files Files Community

JimmyK300 commited on Feb 13

Commit

7681b17

verified ·

1 Parent(s): 8ecfadb

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -24

app.py CHANGED Viewed

@@ -1,41 +1,37 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM
-import os
-os.system("pip install -U huggingface_hub")
-os.system("huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct-GGUF qwen2.5-0.5b-instruct-q2_k.gguf --local-dir . --local-dir-use-symlinks False")
-# Model path or name
-# ./llama-cli -m <gguf-file-path> \
-#     -co -cnv -p "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." \
-#     -fa -ngl 80 -n 512
-MODEL_PATH = "./qwen2.5-0.5b-instruct-q2_k.gguf"
-# Load the model
-model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, model_type="qwen")
 def respond(message, history, system_message, max_tokens, temperature, top_p):
-    # Prepare the prompt with system message and history
-    prompt = system_message + "\n"
     for user_msg, assistant_msg in history:
         prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
     prompt += f"User: {message}\nAssistant:"
-    # Generate response
     response = model(
-        prompt,
-        max_new_tokens=max_tokens,
-        temperature=temperature,
-        top_p=top_p,
     )
-    return response
-# Define Gradio interface
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=512, value=64, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=1.5, value=0.3, step=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=0.8, value=0.75, step=0.05, label="Top-p (nucleus sampling)"),
     ],
 )

 import gradio as gr
+from llama_cpp import Llama
+# Load the Qwen GGUF model
+MODEL_PATH = "./qwen2.5-0.5b-instruct-q2_k.gguf"  # Ensure the file exists in this path
+model = Llama(model_path=MODEL_PATH)
+# Define the chat function
 def respond(message, history, system_message, max_tokens, temperature, top_p):
+    # Prepare the full prompt
+    prompt = f"{system_message}\n"
     for user_msg, assistant_msg in history:
         prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
     prompt += f"User: {message}\nAssistant:"
+    # Generate response using llama-cpp
     response = model(
+        prompt,
+        max_tokens=max_tokens,
+        temperature=temperature,
+        top_p=top_p
     )
+    # Extract text response
+    return response["choices"][0]["text"].strip()
+# Define Gradio chat interface
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
+        gr.Textbox(value="You are a helpful AI assistant.", label="System message"),
+        gr.Slider(minimum=10, maximum=512, value=100, step=10, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.75, step=0.05, label="Top-p (nucleus sampling)"),
     ],
 )