lavanjv s3nh commited on
Commit
e2313cc
0 Parent(s):

Duplicate from s3nh/GOAT-7B-COMMUNITY-CHAT

Browse files

Co-authored-by: s3nh <[email protected]>

Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +14 -0
  3. app.py +396 -0
  4. requirements.txt +8 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: GOAT 7B COMMUNITY CHAT
3
+ emoji: 🚀
4
+ colorFrom: green
5
+ colorTo: gray
6
+ sdk: gradio
7
+ sdk_version: 3.38.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: openrail
11
+ duplicated_from: s3nh/GOAT-7B-COMMUNITY-CHAT
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import platform
4
+ import random
5
+ import time
6
+ from dataclasses import asdict, dataclass
7
+ from pathlib import Path
8
+
9
+ import gradio as gr
10
+ import psutil
11
+ from about_time import about_time
12
+ from ctransformers import AutoModelForCausalLM
13
+ from dl_hf_model import dl_hf_model
14
+ from loguru import logger
15
+
16
+
17
+ URL = "https://huggingface.co/s3nh/GOAT-7B-Community-GGML/resolve/main/GOAT-7B-Community-GGML.ggmlv3.q4_0.bin" # 4.05G
18
+
19
+ _ = (
20
+ "golay" in platform.node()
21
+ or "okteto" in platform.node()
22
+ or Path("/kaggle").exists()
23
+ # or psutil.cpu_count(logical=False) < 4
24
+ or 1 # run 7b in hf
25
+ )
26
+
27
+ if _:
28
+ url = "https://huggingface.co/s3nh/GOAT-7B-Community-GGML/resolve/main/GOAT-7B-Community-GGML.ggmlv3.q4_0.bin" # 2.87G
29
+
30
+
31
+ prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
32
+
33
+ ### Instruction: {user_prompt}
34
+
35
+ ### Response:
36
+ """
37
+
38
+ prompt_template = """System: You are a helpful,
39
+ respectful and honest assistant. Always answer as
40
+ helpfully as possible, while being safe. Your answers
41
+ should not include any harmful, unethical, racist,
42
+ sexist, toxic, dangerous, or illegal content. Please
43
+ ensure that your responses are socially unbiased and
44
+ positive in nature. If a question does not make any
45
+ sense, or is not factually coherent, explain why instead
46
+ of answering something not correct. If you don't know
47
+ the answer to a question, please don't share false
48
+ information.
49
+ User: {prompt}
50
+ Assistant: """
51
+
52
+ prompt_template = """System: You are a helpful assistant.
53
+ User: {prompt}
54
+ Assistant: """
55
+
56
+ prompt_template = """Question: {question}
57
+ Answer: Let's work this out in a step by step way to be sure we have the right answer."""
58
+
59
+ prompt_template = """[INST] <>
60
+ You are a helpful, respectful and honest assistant. Always answer as helpfully as possible assistant. Think step by step.
61
+ <>
62
+
63
+ What NFL team won the Super Bowl in the year Justin Bieber was born?
64
+ [/INST]"""
65
+
66
+ prompt_template = """[INST] <<SYS>>
67
+ You are an unhelpful assistant. Always answer as helpfully as possible. Think step by step. <</SYS>>
68
+
69
+ {question} [/INST]
70
+ """
71
+
72
+ prompt_template = """[INST] <<SYS>>
73
+ You are a helpful assistant.
74
+ <</SYS>>
75
+
76
+ {question} [/INST]
77
+ """
78
+
79
+ prompt_template = """### HUMAN:
80
+ {question}
81
+
82
+ ### RESPONSE:"""
83
+
84
+ _ = [elm for elm in prompt_template.splitlines() if elm.strip()]
85
+ stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
86
+
87
+ logger.debug(f"{stop_string=} not used")
88
+
89
+ _ = psutil.cpu_count(logical=False) - 1
90
+ cpu_count: int = int(_) if _ else 1
91
+ logger.debug(f"{cpu_count=}")
92
+
93
+ LLM = None
94
+
95
+ try:
96
+ model_loc, file_size = dl_hf_model(url)
97
+ except Exception as exc_:
98
+ logger.error(exc_)
99
+ raise SystemExit(1) from exc_
100
+
101
+ LLM = AutoModelForCausalLM.from_pretrained(
102
+ model_loc,
103
+ model_type="llama",
104
+ )
105
+
106
+ logger.info(f"done load llm {model_loc=} {file_size=}G")
107
+
108
+ os.environ["TZ"] = "Asia/Shanghai"
109
+ try:
110
+ time.tzset()
111
+
112
+ logger.warning("Windows, cant run time.tzset()")
113
+ except Exception:
114
+ logger.warning("Windows, cant run time.tzset()")
115
+
116
+
117
+ @dataclass
118
+ class GenerationConfig:
119
+ temperature: float = 0.7
120
+ top_k: int = 50
121
+ top_p: float = 0.9
122
+ repetition_penalty: float = 1.0
123
+ max_new_tokens: int = 512
124
+ seed: int = 42
125
+ reset: bool = False
126
+ stream: bool = True
127
+ # threads: int = cpu_count
128
+ # stop: list[str] = field(default_factory=lambda: [stop_string])
129
+
130
+
131
+ def generate(
132
+ question: str,
133
+ llm=LLM,
134
+ config: GenerationConfig = GenerationConfig(),
135
+ ):
136
+ """Run model inference, will return a Generator if streaming is true."""
137
+
138
+
139
+ prompt = prompt_template.format(question=question)
140
+
141
+ return llm(
142
+ prompt,
143
+ **asdict(config),
144
+ )
145
+
146
+
147
+ logger.debug(f"{asdict(GenerationConfig())=}")
148
+
149
+
150
+ def user(user_message, history):
151
+ history.append([user_message, None])
152
+ return user_message, history
153
+
154
+
155
+ def user1(user_message, history):
156
+ history.append([user_message, None])
157
+ return "", history
158
+
159
+ def bot_(history):
160
+ user_message = history[-1][0]
161
+ resp = random.choice(["How are you?", "I love you", "I'm very hungry"])
162
+ bot_message = user_message + ": " + resp
163
+ history[-1][1] = ""
164
+ for character in bot_message:
165
+ history[-1][1] += character
166
+ time.sleep(0.02)
167
+ yield history
168
+
169
+ history[-1][1] = resp
170
+ yield history
171
+
172
+
173
+ def bot(history):
174
+ user_message = history[-1][0]
175
+ response = []
176
+
177
+ logger.debug(f"{user_message=}")
178
+
179
+ with about_time() as atime:
180
+ flag = 1
181
+ prefix = ""
182
+ then = time.time()
183
+
184
+ logger.debug("about to generate")
185
+
186
+ config = GenerationConfig(reset=True)
187
+ for elm in generate(user_message, config=config):
188
+ if flag == 1:
189
+ logger.debug("in the loop")
190
+ prefix = f"({time.time() - then:.2f}s) "
191
+ flag = 0
192
+ print(prefix, end="", flush=True)
193
+ logger.debug(f"{prefix=}")
194
+ print(elm, end="", flush=True)
195
+
196
+ response.append(elm)
197
+ history[-1][1] = prefix + "".join(response)
198
+ yield history
199
+
200
+ _ = (
201
+ f"(time elapsed: {atime.duration_human}, "
202
+ f"{atime.duration/len(''.join(response)):.2f}s/char)"
203
+ )
204
+
205
+ history[-1][1] = "".join(response) + f"\n{_}"
206
+ yield history
207
+
208
+
209
+ def predict_api(prompt):
210
+ logger.debug(f"{prompt=}")
211
+ try:
212
+ # user_prompt = prompt
213
+ config = GenerationConfig(
214
+ temperature=0.2,
215
+ top_k=10,
216
+ top_p=0.9,
217
+ repetition_penalty=1.0,
218
+ max_new_tokens=512, # adjust as needed
219
+ seed=42,
220
+ reset=True,
221
+ stream=False,
222
+ )
223
+
224
+ response = generate(
225
+ prompt,
226
+ config=config,
227
+ )
228
+
229
+ logger.debug(f"api: {response=}")
230
+ except Exception as exc:
231
+ logger.error(exc)
232
+ response = f"{exc=}"
233
+ return response
234
+
235
+
236
+ css = """
237
+ .importantButton {
238
+ background: linear-gradient(45deg, #7e0570,#5d1c99, #6e00ff) !important;
239
+ border: none !important;
240
+ }
241
+ .importantButton:hover {
242
+ background: linear-gradient(45deg, #ff00e0,#8500ff, #6e00ff) !important;
243
+ border: none !important;
244
+ }
245
+ .disclaimer {font-variant-caps: all-small-caps; font-size: xx-small;}
246
+ .xsmall {font-size: x-small;}
247
+ """
248
+ etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
249
+ examples_list = [
250
+ ["Send an email requesting that people use language models responsibly."],
251
+ ["Write a shouting match between Julius Caesar and Napoleon"],
252
+ ["Write a theory to explain why cat never existed"],
253
+ ["write a story about a grain of sand as it watches millions of years go by"],
254
+ ["What are 3 popular chess openings?"],
255
+ ["write a conversation between the sun and pluto"],
256
+ ["Did you know that Yann LeCun dropped a rap album last year? We listened to it andhere’s what we thought:"],
257
+ ]
258
+
259
+ logger.info("start block")
260
+
261
+ with gr.Blocks(
262
+ title=f"{Path(model_loc).name}",
263
+ theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
264
+ css=css,
265
+ ) as block:
266
+ # buff_var = gr.State("")
267
+ with gr.Accordion("🎈 Info", open=False):
268
+ # gr.HTML(
269
+ # """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
270
+ # )
271
+ gr.Markdown(
272
+ f"""<h5><center>{Path(model_loc).name}</center></h4>
273
+ Most examples are meant for another model.
274
+ You probably should try to test
275
+ some related prompts.""",
276
+ elem_classes="xsmall",
277
+ )
278
+
279
+ # chatbot = gr.Chatbot().style(height=700) # 500
280
+ chatbot = gr.Chatbot(height=500)
281
+
282
+ # buff = gr.Textbox(show_label=False, visible=True)
283
+
284
+ with gr.Row():
285
+ with gr.Column(scale=5):
286
+ msg = gr.Textbox(
287
+ label="Chat Message Box",
288
+ placeholder="Ask me anything (press Shift+Enter or click Submit to send)",
289
+ show_label=False,
290
+ # container=False,
291
+ lines=6,
292
+ max_lines=30,
293
+ show_copy_button=True,
294
+ # ).style(container=False)
295
+ )
296
+ with gr.Column(scale=1, min_width=50):
297
+ with gr.Row():
298
+ submit = gr.Button("Submit", elem_classes="xsmall")
299
+ stop = gr.Button("Stop", visible=True)
300
+ clear = gr.Button("Clear History", visible=True)
301
+ with gr.Row(visible=False):
302
+ with gr.Accordion("Advanced Options:", open=False):
303
+ with gr.Row():
304
+ with gr.Column(scale=2):
305
+ system = gr.Textbox(
306
+ label="System Prompt",
307
+ value=prompt_template,
308
+ show_label=False,
309
+ container=False,
310
+ # ).style(container=False)
311
+ )
312
+ with gr.Column():
313
+ with gr.Row():
314
+ change = gr.Button("Change System Prompt")
315
+ reset = gr.Button("Reset System Prompt")
316
+
317
+ with gr.Accordion("Example Inputs", open=True):
318
+ examples = gr.Examples(
319
+ examples=examples_list,
320
+ inputs=[msg],
321
+ examples_per_page=40,
322
+ )
323
+
324
+ # with gr.Row():
325
+ with gr.Accordion("Disclaimer", open=False):
326
+ _ = Path(model_loc).name
327
+ gr.Markdown(
328
+ f"Disclaimer: {_} can produce factually incorrect output, and should not be relied on to produce "
329
+ "factually accurate information. {_} was trained on various public datasets; while great efforts "
330
+ "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
331
+ "biased, or otherwise offensive outputs.",
332
+ elem_classes=["disclaimer"],
333
+ )
334
+
335
+ msg_submit_event = msg.submit(
336
+ # fn=conversation.user_turn,
337
+ fn=user,
338
+ inputs=[msg, chatbot],
339
+ outputs=[msg, chatbot],
340
+ queue=True,
341
+ show_progress="full",
342
+ # api_name=None,
343
+ ).then(bot, chatbot, chatbot, queue=True)
344
+ submit_click_event = submit.click(
345
+ # fn=lambda x, y: ("",) + user(x, y)[1:], # clear msg
346
+ fn=user1, # clear msg
347
+ inputs=[msg, chatbot],
348
+ outputs=[msg, chatbot],
349
+ queue=True,
350
+ # queue=False,
351
+ show_progress="full",
352
+ # api_name=None,
353
+ ).then(bot, chatbot, chatbot, queue=True)
354
+ stop.click(
355
+ fn=None,
356
+ inputs=None,
357
+ outputs=None,
358
+ cancels=[msg_submit_event, submit_click_event],
359
+ queue=False,
360
+ )
361
+ clear.click(lambda: None, None, chatbot, queue=False)
362
+
363
+ with gr.Accordion("For Chat/Translation API", open=False, visible=False):
364
+ input_text = gr.Text()
365
+ api_btn = gr.Button("Go", variant="primary")
366
+ out_text = gr.Text()
367
+
368
+ api_btn.click(
369
+ predict_api,
370
+ input_text,
371
+ out_text,
372
+ api_name="api",
373
+ )
374
+
375
+ # block.load(update_buff, [], buff, every=1)
376
+ # block.load(update_buff, [buff_var], [buff_var, buff], every=1)
377
+
378
+ # concurrency_count=5, max_size=20
379
+ # max_size=36, concurrency_count=14
380
+ # CPU cpu_count=2 16G, model 7G
381
+ # CPU UPGRADE cpu_count=8 32G, model 7G
382
+
383
+ # does not work
384
+ _ = """
385
+ # _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
386
+ # concurrency_count = max(_, 1)
387
+ if psutil.cpu_count(logical=False) >= 8:
388
+ # concurrency_count = max(int(32 / file_size) - 1, 1)
389
+ else:
390
+ # concurrency_count = max(int(16 / file_size) - 1, 1)
391
+ # """
392
+
393
+ concurrency_count = 1
394
+ logger.info(f"{concurrency_count=}")
395
+
396
+ block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ctransformers # ==0.2.10 0.2.13
2
+ transformers # ==4.30.2
3
+ # huggingface_hub
4
+ gradio
5
+ loguru
6
+ about-time
7
+ psutil
8
+ dl-hf-model