JimmyK300 commited on
Commit
7681b17
·
verified ·
1 Parent(s): 8ecfadb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -24
app.py CHANGED
@@ -1,41 +1,37 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM
3
- import os
4
- os.system("pip install -U huggingface_hub")
5
- os.system("huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct-GGUF qwen2.5-0.5b-instruct-q2_k.gguf --local-dir . --local-dir-use-symlinks False")
6
- # Model path or name
7
- # ./llama-cli -m <gguf-file-path> \
8
- # -co -cnv -p "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." \
9
- # -fa -ngl 80 -n 512
10
- MODEL_PATH = "./qwen2.5-0.5b-instruct-q2_k.gguf"
11
 
12
- # Load the model
13
- model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, model_type="qwen")
 
14
 
 
15
  def respond(message, history, system_message, max_tokens, temperature, top_p):
16
- # Prepare the prompt with system message and history
17
- prompt = system_message + "\n"
18
  for user_msg, assistant_msg in history:
19
  prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
20
  prompt += f"User: {message}\nAssistant:"
21
 
22
- # Generate response
23
  response = model(
24
- prompt,
25
- max_new_tokens=max_tokens,
26
- temperature=temperature,
27
- top_p=top_p,
28
  )
29
- return response
30
 
31
- # Define Gradio interface
 
 
 
32
  demo = gr.ChatInterface(
33
  respond,
34
  additional_inputs=[
35
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
36
- gr.Slider(minimum=1, maximum=512, value=64, step=1, label="Max new tokens"),
37
- gr.Slider(minimum=0.1, maximum=1.5, value=0.3, step=0.1, label="Temperature"),
38
- gr.Slider(minimum=0.1, maximum=0.8, value=0.75, step=0.05, label="Top-p (nucleus sampling)"),
39
  ],
40
  )
41
 
 
1
  import gradio as gr
2
+ from llama_cpp import Llama
 
 
 
 
 
 
 
 
3
 
4
+ # Load the Qwen GGUF model
5
+ MODEL_PATH = "./qwen2.5-0.5b-instruct-q2_k.gguf" # Ensure the file exists in this path
6
+ model = Llama(model_path=MODEL_PATH)
7
 
8
+ # Define the chat function
9
  def respond(message, history, system_message, max_tokens, temperature, top_p):
10
+ # Prepare the full prompt
11
+ prompt = f"{system_message}\n"
12
  for user_msg, assistant_msg in history:
13
  prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
14
  prompt += f"User: {message}\nAssistant:"
15
 
16
+ # Generate response using llama-cpp
17
  response = model(
18
+ prompt,
19
+ max_tokens=max_tokens,
20
+ temperature=temperature,
21
+ top_p=top_p
22
  )
 
23
 
24
+ # Extract text response
25
+ return response["choices"][0]["text"].strip()
26
+
27
+ # Define Gradio chat interface
28
  demo = gr.ChatInterface(
29
  respond,
30
  additional_inputs=[
31
+ gr.Textbox(value="You are a helpful AI assistant.", label="System message"),
32
+ gr.Slider(minimum=10, maximum=512, value=100, step=10, label="Max new tokens"),
33
+ gr.Slider(minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature"),
34
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.75, step=0.05, label="Top-p (nucleus sampling)"),
35
  ],
36
  )
37