vecdigilib-gpt

Build error

App Files Files Community

lavanjv

s3nh commited on Jul 29, 2023

Commit

e2313cc

•

0 Parent(s):

Duplicate from s3nh/GOAT-7B-COMMUNITY-CHAT

Browse files

Co-authored-by: s3nh <[email protected]>

Files changed (4) hide show

.gitattributes +35 -0
README.md +14 -0
app.py +396 -0
requirements.txt +8 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: GOAT 7B COMMUNITY CHAT
+emoji: 🚀
+colorFrom: green
+colorTo: gray
+sdk: gradio
+sdk_version: 3.38.0
+app_file: app.py
+pinned: false
+license: openrail
+duplicated_from: s3nh/GOAT-7B-COMMUNITY-CHAT
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,396 @@

+import os
+import platform
+import random
+import time
+from dataclasses import asdict, dataclass
+from pathlib import Path
+import gradio as gr
+import psutil
+from about_time import about_time
+from ctransformers import AutoModelForCausalLM
+from dl_hf_model import dl_hf_model
+from loguru import logger
+URL = "https://huggingface.co/s3nh/GOAT-7B-Community-GGML/resolve/main/GOAT-7B-Community-GGML.ggmlv3.q4_0.bin" # 4.05G
+_ = (
+ "golay" in platform.node()
+ or "okteto" in platform.node()
+ or Path("/kaggle").exists()
+ # or psutil.cpu_count(logical=False) < 4
+ or 1 # run 7b in hf
+)
+if _:
+ url = "https://huggingface.co/s3nh/GOAT-7B-Community-GGML/resolve/main/GOAT-7B-Community-GGML.ggmlv3.q4_0.bin" # 2.87G
+prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
+### Instruction: {user_prompt}
+### Response:
+"""
+prompt_template = """System: You are a helpful,
+respectful and honest assistant. Always answer as
+helpfully as possible, while being safe. Your answers
+should not include any harmful, unethical, racist,
+sexist, toxic, dangerous, or illegal content. Please
+ensure that your responses are socially unbiased and
+positive in nature. If a question does not make any
+sense, or is not factually coherent, explain why instead
+of answering something not correct. If you don't know
+the answer to a question, please don't share false
+information.
+User: {prompt}
+Assistant: """
+prompt_template = """System: You are a helpful assistant.
+User: {prompt}
+Assistant: """
+prompt_template = """Question: {question}
+Answer: Let's work this out in a step by step way to be sure we have the right answer."""
+prompt_template = """[INST] <>
+You are a helpful, respectful and honest assistant. Always answer as helpfully as possible assistant. Think step by step.
+<>
+What NFL team won the Super Bowl in the year Justin Bieber was born?
+[/INST]"""
+prompt_template = """[INST] <<SYS>>
+You are an unhelpful assistant. Always answer as helpfully as possible. Think step by step. <</SYS>>
+{question} [/INST]
+"""
+prompt_template = """[INST] <<SYS>>
+You are a helpful assistant.
+<</SYS>>
+{question} [/INST]
+"""
+prompt_template = """### HUMAN:
+{question}
+### RESPONSE:"""
+_ = [elm for elm in prompt_template.splitlines() if elm.strip()]
+stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
+logger.debug(f"{stop_string=} not used")
+_ = psutil.cpu_count(logical=False) - 1
+cpu_count: int = int(_) if _ else 1
+logger.debug(f"{cpu_count=}")
+LLM = None
+try:
+ model_loc, file_size = dl_hf_model(url)
+except Exception as exc_:
+ logger.error(exc_)
+ raise SystemExit(1) from exc_
+LLM = AutoModelForCausalLM.from_pretrained(
+ model_loc,
+ model_type="llama",
+)
+logger.info(f"done load llm {model_loc=} {file_size=}G")
+os.environ["TZ"] = "Asia/Shanghai"
+try:
+ time.tzset()
+ logger.warning("Windows, cant run time.tzset()")
+except Exception:
+ logger.warning("Windows, cant run time.tzset()")
+@dataclass
+class GenerationConfig:
+ temperature: float = 0.7
+ top_k: int = 50
+ top_p: float = 0.9
+ repetition_penalty: float = 1.0
+ max_new_tokens: int = 512
+ seed: int = 42
+ reset: bool = False
+ stream: bool = True
+ # threads: int = cpu_count
+ # stop: list[str] = field(default_factory=lambda: [stop_string])
+def generate(
+ question: str,
+ llm=LLM,
+ config: GenerationConfig = GenerationConfig(),
+):
+ """Run model inference, will return a Generator if streaming is true."""
+ prompt = prompt_template.format(question=question)
+ return llm(
+ prompt,
+ **asdict(config),
+ )
+logger.debug(f"{asdict(GenerationConfig())=}")
+def user(user_message, history):
+ history.append([user_message, None])
+ return user_message, history
+def user1(user_message, history):
+ history.append([user_message, None])
+ return "", history
+def bot_(history):
+ user_message = history[-1][0]
+ resp = random.choice(["How are you?", "I love you", "I'm very hungry"])
+ bot_message = user_message + ": " + resp
+ history[-1][1] = ""
+ for character in bot_message:
+ history[-1][1] += character
+ time.sleep(0.02)
+ yield history
+ history[-1][1] = resp
+ yield history
+def bot(history):
+ user_message = history[-1][0]
+ response = []
+ logger.debug(f"{user_message=}")
+ with about_time() as atime:
+ flag = 1
+ prefix = ""
+ then = time.time()
+ logger.debug("about to generate")
+ config = GenerationConfig(reset=True)
+ for elm in generate(user_message, config=config):
+ if flag == 1:
+ logger.debug("in the loop")
+ prefix = f"({time.time() - then:.2f}s) "
+ flag = 0
+ print(prefix, end="", flush=True)
+ logger.debug(f"{prefix=}")
+ print(elm, end="", flush=True)
+ response.append(elm)
+ history[-1][1] = prefix + "".join(response)
+ yield history
+ _ = (
+ f"(time elapsed: {atime.duration_human}, "
+ f"{atime.duration/len(''.join(response)):.2f}s/char)"
+ )
+ history[-1][1] = "".join(response) + f"\n{_}"
+ yield history
+def predict_api(prompt):
+ logger.debug(f"{prompt=}")
+ try:
+ # user_prompt = prompt
+ config = GenerationConfig(
+ temperature=0.2,
+ top_k=10,
+ top_p=0.9,
+ repetition_penalty=1.0,
+ max_new_tokens=512, # adjust as needed
+ seed=42,
+ reset=True,
+ stream=False,
+ )
+ response = generate(
+ prompt,
+ config=config,
+ )
+ logger.debug(f"api: {response=}")
+ except Exception as exc:
+ logger.error(exc)
+ response = f"{exc=}"
+ return response
+css = """
+ .importantButton {
+ background: linear-gradient(45deg, #7e0570,#5d1c99, #6e00ff) !important;
+ border: none !important;
+ }
+ .importantButton:hover {
+ background: linear-gradient(45deg, #ff00e0,#8500ff, #6e00ff) !important;
+ border: none !important;
+ }
+ .disclaimer {font-variant-caps: all-small-caps; font-size: xx-small;}
+ .xsmall {font-size: x-small;}
+"""
+etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
+examples_list = [
+ ["Send an email requesting that people use language models responsibly."],
+ ["Write a shouting match between Julius Caesar and Napoleon"],
+ ["Write a theory to explain why cat never existed"],
+ ["write a story about a grain of sand as it watches millions of years go by"],
+ ["What are 3 popular chess openings?"],
+ ["write a conversation between the sun and pluto"],
+ ["Did you know that Yann LeCun dropped a rap album last year? We listened to it andhere’s what we thought:"],
+]
+logger.info("start block")
+with gr.Blocks(
+ title=f"{Path(model_loc).name}",
+ theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
+ css=css,
+) as block:
+ # buff_var = gr.State("")
+ with gr.Accordion("🎈 Info", open=False):
+ # gr.HTML(
+ # """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
+ # )
+ gr.Markdown(
+ f"""<h5><center>{Path(model_loc).name}</center></h4>
+ Most examples are meant for another model.
+ You probably should try to test
+ some related prompts.""",
+ elem_classes="xsmall",
+ )
+ # chatbot = gr.Chatbot().style(height=700) # 500
+ chatbot = gr.Chatbot(height=500)
+ # buff = gr.Textbox(show_label=False, visible=True)
+ with gr.Row():
+ with gr.Column(scale=5):
+ msg = gr.Textbox(
+ label="Chat Message Box",
+ placeholder="Ask me anything (press Shift+Enter or click Submit to send)",
+ show_label=False,
+ # container=False,
+ lines=6,
+ max_lines=30,
+ show_copy_button=True,
+ # ).style(container=False)
+ )
+ with gr.Column(scale=1, min_width=50):
+ with gr.Row():
+ submit = gr.Button("Submit", elem_classes="xsmall")
+ stop = gr.Button("Stop", visible=True)
+ clear = gr.Button("Clear History", visible=True)
+ with gr.Row(visible=False):
+ with gr.Accordion("Advanced Options:", open=False):
+ with gr.Row():
+ with gr.Column(scale=2):
+ system = gr.Textbox(
+ label="System Prompt",
+ value=prompt_template,
+ show_label=False,
+ container=False,
+ # ).style(container=False)
+ )
+ with gr.Column():
+ with gr.Row():
+ change = gr.Button("Change System Prompt")
+ reset = gr.Button("Reset System Prompt")
+ with gr.Accordion("Example Inputs", open=True):
+ examples = gr.Examples(
+ examples=examples_list,
+ inputs=[msg],
+ examples_per_page=40,
+ )
+ # with gr.Row():
+ with gr.Accordion("Disclaimer", open=False):
+ _ = Path(model_loc).name
+ gr.Markdown(
+ f"Disclaimer: {_} can produce factually incorrect output, and should not be relied on to produce "
+ "factually accurate information. {_} was trained on various public datasets; while great efforts "
+ "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
+ "biased, or otherwise offensive outputs.",
+ elem_classes=["disclaimer"],
+ )
+ msg_submit_event = msg.submit(
+ # fn=conversation.user_turn,
+ fn=user,
+ inputs=[msg, chatbot],
+ outputs=[msg, chatbot],
+ queue=True,
+ show_progress="full",
+ # api_name=None,
+ ).then(bot, chatbot, chatbot, queue=True)
+ submit_click_event = submit.click(
+ # fn=lambda x, y: ("",) + user(x, y)[1:], # clear msg
+ fn=user1, # clear msg
+ inputs=[msg, chatbot],
+ outputs=[msg, chatbot],
+ queue=True,
+ # queue=False,
+ show_progress="full",
+ # api_name=None,
+ ).then(bot, chatbot, chatbot, queue=True)
+ stop.click(
+ fn=None,
+ inputs=None,
+ outputs=None,
+ cancels=[msg_submit_event, submit_click_event],
+ queue=False,
+ )
+ clear.click(lambda: None, None, chatbot, queue=False)
+ with gr.Accordion("For Chat/Translation API", open=False, visible=False):
+ input_text = gr.Text()
+ api_btn = gr.Button("Go", variant="primary")
+ out_text = gr.Text()
+ api_btn.click(
+ predict_api,
+ input_text,
+ out_text,
+ api_name="api",
+ )
+ # block.load(update_buff, [], buff, every=1)
+ # block.load(update_buff, [buff_var], [buff_var, buff], every=1)
+# concurrency_count=5, max_size=20
+# max_size=36, concurrency_count=14
+# CPU cpu_count=2 16G, model 7G
+# CPU UPGRADE cpu_count=8 32G, model 7G
+# does not work
+_ = """
+# _ = int(psutil.virtual_memory().total / 10**9 // file_size - 1)
+# concurrency_count = max(_, 1)
+if psutil.cpu_count(logical=False) >= 8:
+ # concurrency_count = max(int(32 / file_size) - 1, 1)
+else:
+ # concurrency_count = max(int(16 / file_size) - 1, 1)
+# """
+concurrency_count = 1
+logger.info(f"{concurrency_count=}")
+block.queue(concurrency_count=concurrency_count, max_size=5).launch(debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+ctransformers # ==0.2.10 0.2.13
+transformers # ==4.30.2
+# huggingface_hub
+gradio
+loguru
+about-time
+psutil
+dl-hf-model