Spaces:

sandz7
/

chimera

Runtime error

App Files Files Community

sandz7 commited on Jun 21, 2024

Commit

f6da98c

1 Parent(s): ed67eef

removed xtuner and kept diffusor and gpt3.5

Browse files

Files changed (1) hide show

app.py +48 -84

app.py CHANGED Viewed

@@ -34,18 +34,18 @@ DESCRIPTION = '''
 # '''
 # Initialize the models
-llava_model = LlavaForConditionalGeneration.from_pretrained(
-    "xtuner/llava-llama-3-8b-v1_1-transformers",
-    torch_dtype=torch.float32,
-    low_cpu_mem_usage=True,
-)
-llava_model = llava_model.half()
-llava_model.to("cuda:0")
-processor = AutoProcessor.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
-tokenizer = AutoTokenizer.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
-llava_model.generation_config.eos_token_id = 128009
 # # Initialize Stable Diffusion pipelines
 # base = DiffusionPipeline.from_pretrained(
@@ -92,42 +92,42 @@ refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-ref
 chat_mode = {}
-def xtuner(message, history):
-    """
-    Generates text out of image input from llava.
-    """
-    image_path = None
-    if "files" in message and message["files"]:
-        if type(message["files"][-1]) == dict:
-            image_path = message["files"][-1]["path"]
-        else:
-            image_path = message["files"][-1]
-    else:
-        for hist in history:
-            if type(hist[0]) == tuple:
-                image_path = hist[0][0]
-    try:
-        if image_path is None:
-            gr.Error("Please upload an image so Chimera can understand it.")
-    except NameError:
-        gr.Error("Upload an Image so Chimera can understand it.")
-    # prompt = f"user\n\n<image>\n{message['text']}assistant\n\n"
-    # image = Image.open(image_path)
-    with open(image_path, "rb") as image_file:
-        encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
-    prompt = f"user\n\n<image>\n{encoded_image}\n\n{message['text']}assistant\n\n"
-    inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True, max_length=512)
-    streamer = TextIteratorStreamer(processor.tokenizer, **{"skip_special_tokens": False, "skip_prompt": True})
-    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False)
-    with torch.cuda.amp.autocast():
-        outputs = llava_model.generate(**generation_kwargs)
-    output_ids = outputs[0].cpu().numpy().tolist()
-    decoded_output = tokenizer.decode(output_ids, skip_special_tokens=True)
     # prompt = f"user\n\n<image>\n{encoded_image}\n\n{message['text']}assistant\n\n"
     # inputs = tokenizer(prompt, image, return_tensors='pt', padding=True, truncation=True, max_length=512)
@@ -152,7 +152,7 @@ def xtuner(message, history):
     #     yield generate_text
     # yield decoded_output
-    yield decoded_output
 def generation(message, history):
     """
@@ -241,29 +241,6 @@ def bot_comms(message, history):
     if message["text"] == "check cuda":
         logger.debug("Checking CUDA availability.")
         return check_cuda_availability()
-    # if message["text"] == "imagery":
-    #     logger.debug("Switching to imagery mode.")
-    #     # mode_manager.set_mode("imagery")
-    #     mode += "imagery"
-    #     # logger.debug(f"\nimagery mode: {mode}\n")
-    #     return "Imagery On! Type your prompt to make the image 🖼️"
-    # if message["text"] == "chatting":
-    #     logger.debug("Switching to chatting mode.")
-    #     # mode_manager.set_mode("chatting")
-    #     mode += "chatting"
-    #     # logger.debug(f"\nchatting mode: {mode}\n")
-    #     return "Imagery Off. Ask me any questions. ☄️"
-    # if mode == "imagery":
-    #     logger.debug("Processing imagery prompt.")
-    #     if isinstance(message, dict) and "text" in message:
-    #         message = message["text"]
-    #     image = diffusing(message)
-    #     # mode_gradio("imagery")
-    #     image_created["Image"] = image
-    #     return image
     buffer = ""
     gpt_outputs = []
@@ -306,22 +283,9 @@ with gr.Blocks(fill_height=True) as demo:
             gr.Markdown('''
 <div>
 <h1 style="text-align: center;">Chimera Text Generation</h1>
-<p style="text-align: center;">This contains a Generative LLM from <a href="https://openai.com/"><b>Open AI</b></a> called GPT-3.5-Turbo</p>
 </div>
 ''')
             chat = gr.ChatInterface(fn=bot_comms)
-    # Xtuner
-    with gr.Row():
-        with gr.Column():
-             gr.Markdown('''
-<div>
-<p style="text-align: center;">This has a LLava-Model from <a href="https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers"><b>Xtuner</b></a></p>
-</div>
-''')
-             chat = gr.ChatInterface(
-                 fn=xtuner,
-                 multimodal=True,
-                 textbox=chat_input,
-             )
 demo.launch()

 # '''
 # Initialize the models
+# llava_model = LlavaForConditionalGeneration.from_pretrained(
+#     "xtuner/llava-llama-3-8b-v1_1-transformers",
+#     torch_dtype=torch.float32,
+#     low_cpu_mem_usage=True,
+# )
+# llava_model = llava_model.half()
+# llava_model.to("cuda:0")
+# processor = AutoProcessor.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
+# tokenizer = AutoTokenizer.from_pretrained("xtuner/llava-llama-3-8b-v1_1-transformers")
+# llava_model.generation_config.eos_token_id = 128009
 # # Initialize Stable Diffusion pipelines
 # base = DiffusionPipeline.from_pretrained(
 chat_mode = {}
+# def xtuner(message, history):
+#     """
+#     Generates text out of image input from llava.
+#     """
+#     image_path = None
+#     if "files" in message and message["files"]:
+#         if type(message["files"][-1]) == dict:
+#             image_path = message["files"][-1]["path"]
+#         else:
+#             image_path = message["files"][-1]
+#     else:
+#         for hist in history:
+#             if type(hist[0]) == tuple:
+#                 image_path = hist[0][0]
+#     try:
+#         if image_path is None:
+#             gr.Error("Please upload an image so Chimera can understand it.")
+#     except NameError:
+#         gr.Error("Upload an Image so Chimera can understand it.")
+#     # prompt = f"user\n\n<image>\n{message['text']}assistant\n\n"
+#     # image = Image.open(image_path)
+#     with open(image_path, "rb") as image_file:
+#         encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
+#     prompt = f"user\n\n<image>\n{encoded_image}\n\n{message['text']}assistant\n\n"
+#     inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True, max_length=512)
+#     streamer = TextIteratorStreamer(processor.tokenizer, **{"skip_special_tokens": False, "skip_prompt": True})
+#     generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024, do_sample=False)
+#     with torch.cuda.amp.autocast():
+#         outputs = llava_model.generate(**generation_kwargs)
+#     output_ids = outputs[0].cpu().numpy().tolist()
+#     decoded_output = tokenizer.decode(output_ids, skip_special_tokens=True)
     # prompt = f"user\n\n<image>\n{encoded_image}\n\n{message['text']}assistant\n\n"
     # inputs = tokenizer(prompt, image, return_tensors='pt', padding=True, truncation=True, max_length=512)
     #     yield generate_text
     # yield decoded_output
+    # yield decoded_output
 def generation(message, history):
     """
     if message["text"] == "check cuda":
         logger.debug("Checking CUDA availability.")
         return check_cuda_availability()
     buffer = ""
     gpt_outputs = []
             gr.Markdown('''
 <div>
 <h1 style="text-align: center;">Chimera Text Generation</h1>
+<p style="text-align: center;">This contains a Generative LLM from <a href="https://openai.com/"><b>Open AI</b></a> called GPT-3.5-Turbo and Vision.</p>
 </div>
 ''')
             chat = gr.ChatInterface(fn=bot_comms)
 demo.launch()