adlsdztony commited on
Commit
fe2da2c
·
1 Parent(s): 507970e

更新 app.py,替换模型为 Rui-3B,调整响应生成逻辑,修改系统消息为中文

Browse files
Files changed (1) hide show
  1. app.py +44 -17
app.py CHANGED
@@ -1,10 +1,19 @@
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("adlsdztony/Rui-0.5B")
 
 
 
 
 
 
8
 
9
 
10
  def respond(
@@ -25,19 +34,37 @@ def respond(
25
 
26
  messages.append({"role": "user", "content": message})
27
 
28
- response = ""
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- response += token
40
- yield response
41
 
42
 
43
  """
@@ -46,7 +73,7 @@ For information on how to customize the ChatInterface, peruse the gradio docs: h
46
  demo = gr.ChatInterface(
47
  respond,
48
  additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
  gr.Slider(
 
1
+
2
+ from peft import AutoPeftModelForCausalLM
3
+ from transformers import AutoTokenizer, TextIteratorStreamer
4
+ from threading import Thread
5
  import gradio as gr
 
6
 
7
+ model = AutoPeftModelForCausalLM.from_pretrained("adlsdztony/Rui-3B")
8
+ tokenizer = AutoTokenizer.from_pretrained("adlsdztony/Rui-3B")
9
+
10
+
11
+ # from huggingface_hub import InferenceClient
12
+
13
+ # """
14
+ # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
15
+ # """
16
+ # client = InferenceClient("adlsdztony/Rui-3B")
17
 
18
 
19
  def respond(
 
34
 
35
  messages.append({"role": "user", "content": message})
36
 
 
37
 
38
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
39
+
40
+ inputs = tokenizer([prompt], return_tensors='pt', padding=True, truncation=True)
41
+
42
+ streamer = TextIteratorStreamer(tokenizer)
43
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_tokens, do_sample=True, temperature=temperature, top_p=top_p)
44
+
45
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
46
+ thread.start()
47
+
48
+ generate_text = ''
49
+ for new_text in streamer:
50
+ output = new_text.replace(prompt, '')
51
+ if output:
52
+ generate_text += output
53
+ yield generate_text
54
+
55
+ # response = ""
56
+
57
+ # for message in client.chat_completion(
58
+ # messages,
59
+ # max_tokens=max_tokens,
60
+ # stream=True,
61
+ # temperature=temperature,
62
+ # top_p=top_p,
63
+ # ):
64
+ # token = message.choices[0].delta.content
65
 
66
+ # response += token
67
+ # yield response
68
 
69
 
70
  """
 
73
  demo = gr.ChatInterface(
74
  respond,
75
  additional_inputs=[
76
+ gr.Textbox(value="你是小锐,你只会说中文,你会自称为‘锐’,你的工作是每天告诉同学明天的天气和一些最近发生的事情,最后你会跟同学说晚安", label="System message"),
77
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
78
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
79
  gr.Slider(