Spaces:

sandz7
/

chimera

Runtime error

App Files Files Community

sandz7 commited on Jun 7, 2024

Commit

a423985

1 Parent(s): 5613b72

added async

Browse files

Files changed (1) hide show

app.py +33 -65

app.py CHANGED Viewed

@@ -18,18 +18,17 @@ DESCRIPTION = '''
 <p>This contains a Stable Diffusor from <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0"><b>stabilityai/stable-diffusion-xl-base-1.0</b></a> and a Multimodal from <a href="https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers"><b>xtuner/llava-llama-3-8b-v1_1-transformers</b></a></p>
 </div>
 '''
 # Llava Installed
 llava_model = LlavaForConditionalGeneration.from_pretrained(
     "xtuner/llava-llama-3-8b-v1_1-transformers",
     torch_dtype=torch.float16,
     low_cpu_mem_usage=True,
-    )
 llava_model.to("cuda:0")
 processor = AutoProcessor.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
-llava_model.generation_config.eos_token_id=128009
 # Stable Diffusor Installed
 base = DiffusionPipeline.from_pretrained(
@@ -50,67 +49,44 @@ refiner = DiffusionPipeline.from_pretrained(
 )
 refiner.to('cuda')
-# All Installed. Let's instance them in the function
 def multimodal_and_generation(message, history):
-    """
-    Receives input from gradio from the prompt but also
-    if any images were passed that i also placed for formatting
-    for PIL and with the prompt both are passed to proper generation,
-    depending on the request from prompt, that prompt output will return here.
-    """
     print(f"Message:\n{message}\nType:\n{type(message)}")
     image_path = None
     if message["files"]:
-        if type(message["files"][-1]) == dict:
             image_path = message["files"][-1]["path"]
         else:
             image_path = message["files"][-1]
     else:
-        # If no image was uploaded than look for past ones
         for hist in history:
-            if type(hist[0]) == tuple:
-                image_path = hist[0][0] # item inside items for history
     if image_path is None:
         input_prompt = message["text"]
-        # base_prompt = '''gpt response: {input_prompt}'''
-        # prompt_formatted = base_prompt.format(input_prompt=input_prompt)
-        # GPT Generation
         client = OpenAI(api_key=API_KEY)
         stream = client.chat.completions.create(
             model="gpt-3.5-turbo",
-            messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."},
-                      {"role": "user", "content": input_prompt}],
             stream=True,
         )
         return stream
     else:
-        prompt = f"<|start_header_id|>user<|end_header_id|>\n\n<image>\n{message['text']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
-        # Time to instance the llava
         image = Image.open(image_path)
         inputs = processor(prompt, image, return_tensors='pt').to(0, torch.float16)
-        streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": False, "skip_prompt": True})
         generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False)
         thread = threading.Thread(target=llava_model.generate, kwargs=generation_kwargs)
         thread.start()
-        # buffer = ""
-        # for new_text in streamer:
-        #     # find <|eot_id|> and remove it from the new_text
-        #     if "<|eot_id|>" in new_text:
-        #         new_text = new_text.split("<|eot_id|>")[0]
-        #     buffer += new_text
-        #     generated_text_no_prompt = buffer
-        #     yield generated_text_no_prompt
         return streamer
 def diffusing(prompt):
-    """
-    Uses stable diffusion on the prompt and
-    returns the image.
-    """
     image = base(
         prompt=prompt,
         num_inference_steps=40,
@@ -135,62 +111,54 @@ def check_cuda_availability():
 mode = ""
 @spaces.GPU(duration=120)
-def bot_comms(message,
-              history):
-    """
-    Communication between gradio and the models.
-    """
     global mode
     if message == "check cuda":
         result = check_cuda_availability()
         yield result
         return
     if message == "imagery":
         mode = message
         yield "Imagery On! Type your prompt to make the image 🖼️"
         return
     if message == "chatting":
         mode = message
         yield "Imagery Off. Ask me any questions. ☄️"
         return
     if mode == "imagery":
         print("On imagery\n\n")
         image = diffusing(
-            message=message,
-            history=history,
         )
-        return image
-    buffer = ""
-    gpt_outputs = []
     if mode == "chatting" or mode == "":
         print("On chatting or no mode.\n\n")
         stream = multimodal_and_generation(
             message=message,
             history=history,
         )
-        streamer_text = [text for text in stream]
-        if "<|eot_id|>" not in streamer_text:
             for chunk in stream:
                 if chunk.choices[0].delta.content is not None:
                     text = chunk.choices[0].delta.content
                     gpt_outputs.append(text)
                     yield "".join(gpt_outputs)
-        else:
-            for text in stream:
-                # find <|eot_id|> and remove it from the text
-                if "<|eot_id|>" in text:
-                    text = text.split("<|eot_id|>")[0]
-                buffer += text
-                generated_text = buffer
-                yield generated_text
-chatbot=gr.Chatbot(height=600, label="Chimera AI")
 chat_input = gr.MultimodalTextbox(interactive=True, file_types=["images"], placeholder="Enter your question or upload an image.", show_label=False)
 with gr.Blocks(fill_height=True) as demo:
     gr.Markdown(DESCRIPTION)
@@ -203,4 +171,4 @@ with gr.Blocks(fill_height=True) as demo:
     )
 if __name__ == "__main__":
-    demo.launch()

 <p>This contains a Stable Diffusor from <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0"><b>stabilityai/stable-diffusion-xl-base-1.0</b></a> and a Multimodal from <a href="https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers"><b>xtuner/llava-llama-3-8b-v1_1-transformers</b></a></p>
 </div>
 '''
 # Llava Installed
 llava_model = LlavaForConditionalGeneration.from_pretrained(
     "xtuner/llava-llama-3-8b-v1_1-transformers",
     torch_dtype=torch.float16,
     low_cpu_mem_usage=True,
+)
 llava_model.to("cuda:0")
 processor = AutoProcessor.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
+llava_model.generation_config.eos_token_id = 128009
 # Stable Diffusor Installed
 base = DiffusionPipeline.from_pretrained(
 )
 refiner.to('cuda')
 def multimodal_and_generation(message, history):
     print(f"Message:\n{message}\nType:\n{type(message)}")
     image_path = None
     if message["files"]:
+        if isinstance(message["files"][-1], dict):
             image_path = message["files"][-1]["path"]
         else:
             image_path = message["files"][-1]
     else:
         for hist in history:
+            if isinstance(hist[0], tuple):
+                image_path = hist[0][0]
     if image_path is None:
         input_prompt = message["text"]
         client = OpenAI(api_key=API_KEY)
         stream = client.chat.completions.create(
             model="gpt-3.5-turbo",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant called 'chimera'."},
+                {"role": "user", "content": input_prompt}
+            ],
             stream=True,
         )
         return stream
     else:
+        prompt = f"user\n\n<image>\n{message['text']}assistant\n\n"
         image = Image.open(image_path)
         inputs = processor(prompt, image, return_tensors='pt').to(0, torch.float16)
+        streamer = TextIteratorStreamer(processor.tokenizer, **{"skip_special_tokens": False, "skip_prompt": True})
         generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False)
         thread = threading.Thread(target=llava_model.generate, kwargs=generation_kwargs)
         thread.start()
         return streamer
 def diffusing(prompt):
     image = base(
         prompt=prompt,
         num_inference_steps=40,
 mode = ""
 @spaces.GPU(duration=120)
+async def bot_comms(message, history):
     global mode
     if message == "check cuda":
         result = check_cuda_availability()
         yield result
         return
     if message == "imagery":
         mode = message
         yield "Imagery On! Type your prompt to make the image 🖼️"
         return
     if message == "chatting":
         mode = message
         yield "Imagery Off. Ask me any questions. ☄️"
         return
     if mode == "imagery":
         print("On imagery\n\n")
         image = diffusing(
+            prompt=message,
         )
+        yield image
+        return
     if mode == "chatting" or mode == "":
         print("On chatting or no mode.\n\n")
         stream = multimodal_and_generation(
             message=message,
             history=history,
         )
+        if isinstance(stream, TextIteratorStreamer):
+            buffer = ""
+            for new_text in stream:
+                if "" in new_text:
+                    new_text = new_text.split("")[0]
+                buffer += new_text
+                yield buffer
+        else:
+            gpt_outputs = []
             for chunk in stream:
                 if chunk.choices[0].delta.content is not None:
                     text = chunk.choices[0].delta.content
                     gpt_outputs.append(text)
                     yield "".join(gpt_outputs)
+chatbot = gr.Chatbot(height=600, label="Chimera AI")
 chat_input = gr.MultimodalTextbox(interactive=True, file_types=["images"], placeholder="Enter your question or upload an image.", show_label=False)
 with gr.Blocks(fill_height=True) as demo:
     gr.Markdown(DESCRIPTION)
     )
 if __name__ == "__main__":
+    demo.launch()