Spaces:

slush0
/

petals-playground

Runtime error

App Files Files Community

slush0 commited on Feb 18, 2023

Commit

3d3362f

1 Parent(s): 68d1b54

Initial commit

Browse files

Files changed (3) hide show

app.py +123 -0
chat_client.py +78 -0
requirements.txt +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,123 @@

+#!/usr/bin/env python
+# or gradio app.py
+import gradio as gr
+import chat_client
+CHAT_URL='ws://chat.petals.ml/api/v2/generate'
+#CHAT_URL='ws://localhost:8000/api/v2/generate'
+def generate(prompt, model, endseq, max_length,
+        do_sample, top_k, top_p, temperature,
+        add_stoptoken, copy_output):
+    client = chat_client.ModelClient(CHAT_URL)
+    client.open_session(f"bigscience/{model}-petals", max_length)
+    if add_stoptoken:
+        prompt += "</s>" if "bloomz" in model else "\n\n"
+    # Translate checkbox items to actual sequences
+    seq = []
+    for s in endseq:
+        if s == "\\n":
+            seq.append("\n")
+        elif s == "</s>":
+            seq.append("</s>")
+        elif s == "? (question mark)":
+            seq.append("?")
+        elif s == ". (dot)":
+            seq.append(".")
+    # only top_k or top_p can be set
+    if top_k == 0:
+        top_k = None
+    if top_p == 0:
+        top_p = None
+    if top_p and top_k:
+        top_k = None
+    prompt2 = prompt
+    output = ''
+    # This render prompt dialog immediately and
+    # don't wait to generator to return first result
+    yield [prompt2, output]
+    for out in client.generate(prompt,
+                    max_new_tokens=1,
+                    do_sample=do_sample,
+                    temperature=temperature,
+                    top_k=top_k,
+                    top_p=top_p,
+                    extra_stop_sequences=seq
+        ):
+        output += out
+        if copy_output:
+            prompt2 += out
+        yield [prompt2, output]
+with gr.Blocks() as iface:
+    gr.Markdown("""# Petals playground
+            **Let's play with prompts and inference settings for BLOOM and BLOOMZ 176B models! This space uses websocket API of [chat.petals.ml](https://chat.petals.ml).**
+            Do NOT talk to BLOOM as an entity, it's not a chatbot but a webpage/blog/article completion model.
+            For the best results: MIMIC a few sentences of a webpage similar to the content you want to generate.
+            BLOOMZ performs better in chat mode and understands the instructions better.""")
+    with gr.Row():
+        model = gr.Radio(["bloom", "bloomz", "bloom-7b1"], value='bloom', label="Use model")
+        # Additional ending sequence, at which generation shoud stop
+        endseq = gr.CheckboxGroup(["\\n", "</s>", "? (question mark)", ". (dot)"],
+            value=["\\n", "</s>"], label='Extra end sequences')
+        # Maximum length of inference session
+        max_length = gr.Radio([128, 256, 512, 1024, 2048], value=512, interactive=True, label="Max length")
+    with gr.Row():
+        with gr.Column():
+            # Switch between sampling and greedy generation
+            do_sample = gr.Checkbox(value=True, interactive=True, label="do_sample")
+            # Should the app append stop sequence at the end of prompt or should it leave the prompt open?
+            add_stoptoken = gr.Checkbox(value=True, interactive=True, label="Automatically add stop token to prompt.")
+        # Only one of top_k and top_p can be set. Requires "do_sample=True" to work.
+        top_k = gr.Number(value=0, precision=0, interactive=True, label="top_k")
+        top_p = gr.Number(value=0.9, precision=2, interactive=True, label="top_p")
+        # Generation temperature
+        temperature = gr.Number(value=0.75, precision=2, interactive=True, label="Temperature")
+    prompt = gr.Textbox(lines=2, label='Prompt', placeholder="Prompt Here...")
+    with gr.Row():
+        button_generate = gr.Button("Generate")
+        button_stop = gr.Button("Stop") # TODO, not supported by websocket API yet.
+        # Automatically copy the output at the end of prompt
+        copy_output = gr.Checkbox(label="Output -> Prompt")
+    output = gr.Textbox(lines=3, label='Output')
+    button_generate.click(generate, inputs=[prompt, model, endseq,
+            max_length, do_sample, top_k, top_p, temperature, add_stoptoken, copy_output], outputs=[prompt, output])
+    examples = gr.Examples(inputs=[prompt, model, do_sample, top_k, top_p, temperature, add_stoptoken],
+        examples=[
+        ["The SQL command to extract all the users whose name starts with A is: ", "bloom", False, 0, 0, 1, False],
+        ["The Spanish translation of thank you for your help is: ", "bloom", False, 0, 0, 1, False],
+        ["A human talks to a powerful AI that follows the human's instructions "
+         "and writes exhaustive, very detailed answer.</s>\n"
+         "Human: Hi!</s>\n"
+         "AI: Hi! How can I help you?</s>\n"
+         "Human: What's the capital of Portugal?</s>\n"
+         "AI: ", "bloomz", True, 0, 0.9, 0.75, False]
+        ])
+iface.queue()
+iface.launch()

chat_client.py ADDED Viewed

	@@ -0,0 +1,78 @@

+#!/usr/bin/env python
+import json
+import sys
+# pip install websocket-client
+import websocket
+class ModelClient(object):
+    def __init__(self, endpoint_url):
+        self.endpoint_url = endpoint_url
+        self.ws = None
+        self.model = None
+    def open_session(self, model, max_length):
+        self.ws = websocket.create_connection(self.endpoint_url)
+        self.model = model
+        payload = {
+                "type": "open_inference_session",
+                "model": self.model,
+                "max_length": max_length,
+            }
+        self.ws.send(json.dumps(payload))
+        assert json.loads(self.ws.recv())['ok'] == True
+    def close_session(self):
+        if self.ws:
+            self.ws.close()
+    def generate(self, prompt, **kwargs):
+        payload = {
+                "type": "generate",
+                "inputs": prompt,
+                "max_new_tokens": 1,
+                "do_sample": 0,
+                "temperature": 0,
+                "stop_sequence": "</s>" if "bloomz" in self.model else "\n\n",
+            }
+        payload = {**payload, **kwargs}
+        self.ws.send(json.dumps(payload))
+        while True:
+            try:
+                data = json.loads(self.ws.recv())
+            except json.decoder.JSONDecodeError:
+                self.close_session()
+                raise
+            if not data['ok']:
+                raise Exception(data['traceback'])
+            yield data['outputs']
+            if data['stop']:
+                break
+def main():
+    client = ModelClient("ws://localhost:8000/api/v2/generate")
+    # client = ModelClient("ws://chat.petals.ml/api/v2/generate")
+    client.open_session("bigscience/bloom-petals", 128)
+    if len(sys.argv) > 1:
+        prompt = sys.argv[1]
+        # Bloomz variant uses </s> instead of \n\n as an eos token
+        if not prompt.endswith("\n\n"):
+            prompt += "\n\n"
+    else:
+        prompt = "The SQL command to extract all the users whose name starts with A is: \n\n"
+        print(f"Prompt: {prompt}")
+    # petals.client.routing.sequence_manager.MissingBlocksError
+    for out in client.generate(prompt,
+                            do_sample=True,
+                            temperature=0.75,
+                            top_p=0.9):
+        print(out, end="", flush=True)
+    client.close_session()
+if __name__ == '__main__':
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ websocket-client
2	+ gradio