JimmyK300 commited on
Commit
55ac720
·
verified ·
1 Parent(s): 6611552

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -40
app.py CHANGED
@@ -1,51 +1,52 @@
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
-
5
  MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
6
-
7
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, fast_tokenizer=True)
8
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, deivce_map="auto", torch_dtype=torch.float16)
9
-
10
  def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
  ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- inputs = tokenizer(message, return_tensors="pt").to("cpu")
29
- outputs = model.generate(**inputs, max_length=max_tokens, temperature=temperature, top_p=top_p)
 
30
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
31
-
32
  return response
33
-
34
  demo = gr.ChatInterface(
35
- respond,
36
- additional_inputs=[
37
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
38
- gr.Slider(minimum=1, maximum=512, value=128, step=1, label="Max new tokens"),
39
- gr.Slider(minimum=0.1, maximum=1.5, value=0.3, step=0.1, label="Temperature"),
40
- gr.Slider(
41
- minimum=0.1,
42
- maximum=0.8,
43
- value=0.75,
44
- step=0.05,
45
- label="Top-p (nucleus sampling)",
46
- ),
47
- ],
48
  )
49
-
50
  if __name__ == "__main__":
51
- demo.launch()
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
+  
5
  MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
6
+  
7
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
8
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
9
+  
10
  def respond(
11
+     message,
12
+     history: list[tuple[str, str]],
13
+     system_message,
14
+     max_tokens,
15
+     temperature,
16
+     top_p,
17
  ):
18
+     messages = [{"role": "system", "content": system_message}]
19
+  
20
+     for val in history:
21
+         if val[0]:
22
+             messages.append({"role": "user", "content": val[0]})
23
+         if val[1]:
24
+             messages.append({"role": "assistant", "content": val[1]})
25
+  
26
+     messages.append({"role": "user", "content": message})
27
+  
28
+     inputs = tokenizer(message, return_tensors="pt").to("cpu")
29
+ with torch.no_grad():
30
+     outputs = model.generate(**inputs, max_length=max_tokens, temperature=temperature, top_p=top_p)
31
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
32
+    
33
  return response
34
+  
35
  demo = gr.ChatInterface(
36
+     respond,
37
+     additional_inputs=[
38
+         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
39
+         gr.Slider(minimum=1, maximum=512, value=64, step=1, label="Max new tokens"),
40
+         gr.Slider(minimum=0.1, maximum=1.5, value=0.3, step=0.1, label="Temperature"),
41
+         gr.Slider(
42
+             minimum=0.1,
43
+             maximum=0.8,
44
+             value=0.75,
45
+             step=0.05,
46
+             label="Top-p (nucleus sampling)",
47
+         ),
48
+     ],
49
  )
50
+  
51
  if __name__ == "__main__":
52
+     demo.launch()