JimmyK300 commited on
Commit
473b660
·
verified ·
1 Parent(s): 55ac720

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -40
app.py CHANGED
@@ -1,52 +1,49 @@
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
-  
 
5
  MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
6
-  
 
7
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
8
  model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
9
-  
10
- def respond(
11
-     message,
12
-     history: list[tuple[str, str]],
13
-     system_message,
14
-     max_tokens,
15
-     temperature,
16
-     top_p,
17
- ):
18
-     messages = [{"role": "system", "content": system_message}]
19
-  
20
-     for val in history:
21
-         if val[0]:
22
-             messages.append({"role": "user", "content": val[0]})
23
-         if val[1]:
24
-             messages.append({"role": "assistant", "content": val[1]})
25
-  
26
-     messages.append({"role": "user", "content": message})
27
-  
28
-     inputs = tokenizer(message, return_tensors="pt").to("cpu")
29
  with torch.no_grad():
30
-     outputs = model.generate(**inputs, max_length=max_tokens, temperature=temperature, top_p=top_p)
 
 
 
31
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
32
-    
33
  return response
34
-  
 
35
  demo = gr.ChatInterface(
36
-     respond,
37
-     additional_inputs=[
38
-         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
39
-         gr.Slider(minimum=1, maximum=512, value=64, step=1, label="Max new tokens"),
40
-         gr.Slider(minimum=0.1, maximum=1.5, value=0.3, step=0.1, label="Temperature"),
41
-         gr.Slider(
42
-             minimum=0.1,
43
-             maximum=0.8,
44
-             value=0.75,
45
-             step=0.05,
46
-             label="Top-p (nucleus sampling)",
47
-         ),
48
-     ],
49
  )
50
-  
 
51
  if __name__ == "__main__":
52
-     demo.launch()
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
+
5
+ # Model name
6
  MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
7
+
8
+ # Load tokenizer and model
9
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
  model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
11
+
12
+ def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
13
+ messages = [{"role": "system", "content": system_message}]
14
+
15
+ # Add chat history to messages
16
+ for user_msg, assistant_msg in history:
17
+ if user_msg:
18
+ messages.append({"role": "user", "content": user_msg})
19
+ if assistant_msg:
20
+ messages.append({"role": "assistant", "content": assistant_msg})
21
+
22
+ messages.append({"role": "user", "content": message})
23
+
24
+ # Tokenize input
25
+ inputs = tokenizer(message, return_tensors="pt").to("cpu")
26
+
27
+ # Generate response
 
 
 
28
  with torch.no_grad():
29
+ outputs = model.generate(
30
+ **inputs, max_length=max_tokens, temperature=temperature, top_p=top_p
31
+ )
32
+
33
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
34
  return response
35
+
36
+ # Define Gradio interface
37
  demo = gr.ChatInterface(
38
+ respond,
39
+ additional_inputs=[
40
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
41
+ gr.Slider(minimum=1, maximum=512, value=64, step=1, label="Max new tokens"),
42
+ gr.Slider(minimum=0.1, maximum=1.5, value=0.3, step=0.1, label="Temperature"),
43
+ gr.Slider(minimum=0.1, maximum=0.8, value=0.75, step=0.05, label="Top-p (nucleus sampling)"),
44
+ ],
 
 
 
 
 
 
45
  )
46
+
47
+ # Launch Gradio app
48
  if __name__ == "__main__":
49
+ demo.launch()