jasonfang3900 commited on
Commit
ffb2ba9
·
verified ·
1 Parent(s): c2c87e7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+ import requests
4
+ import os
5
+
6
+ url = "http://59.110.170.104:8084/chat_completion"
7
+
8
+
9
+ def respond(
10
+ message,
11
+ history: list[tuple[str, str]],
12
+ do_sample: bool,
13
+ seed: int,
14
+ max_new_tokens,
15
+ temperature,
16
+ top_p,
17
+ top_k,
18
+ repetition_penalty
19
+ ):
20
+ messages = []
21
+
22
+ for val in history:
23
+ if val[0]:
24
+ messages.append({"role": "user", "content": val[0]})
25
+ if val[1]:
26
+ messages.append({"role": "assistant", "content": val[1]})
27
+
28
+ messages.append({"role": "user", "content": message})
29
+
30
+ response = ""
31
+ request_data = dict(
32
+ messages=messages,
33
+ max_new_tokens=max_new_tokens,
34
+ do_sample=do_sample,
35
+ seed=seed,
36
+ top_p=top_p,
37
+ top_k=top_k,
38
+ temperature=temperature,
39
+ repetition_penalty=repetition_penalty
40
+ )
41
+ print(request_data)
42
+ with requests.post(url, json=request_data, stream=True, headers={"Authorization": f"Bearer {os.environ['HF_TOKEN']}"}) as r:
43
+ # printing response of each stream
44
+ for chunk in r.iter_content(1024):
45
+ response += chunk.decode("utf8")
46
+ yield response
47
+
48
+ """
49
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
50
+ """
51
+
52
+ demo = gr.ChatInterface(
53
+ respond,
54
+ chatbot=gr.Chatbot(height=600),
55
+ additional_inputs=[
56
+ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
57
+ gr.Checkbox(True, label="do sample"),
58
+ gr.Number(42, precision=0, label="seed"),
59
+ gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max new tokens"),
60
+ gr.Slider(minimum=0.01, maximum=4.0, value=0.7, step=0.01, label="Temperature"),
61
+ gr.Slider(
62
+ minimum=0.1,
63
+ maximum=1.0,
64
+ value=1.0,
65
+ step=0.05,
66
+ label="Top-p (nucleus sampling)",
67
+ ),
68
+ gr.Slider(
69
+ minimum=0,
70
+ maximum=100,
71
+ value=0,
72
+ step=1,
73
+ label="Top-K (Top-K sampling)",
74
+ ),
75
+ gr.Slider(
76
+ minimum=1,
77
+ maximum=2,
78
+ value=1.03,
79
+ step=0.01,
80
+ label="repetition penalty",
81
+ ),
82
+ ],
83
+ )
84
+
85
+
86
+ if __name__ == "__main__":
87
+ demo.queue(default_concurrency_limit=2, max_size=10)
88
+ demo.launch()