|
import gradio as gr |
|
import os |
|
import time |
|
import spaces |
|
import torch |
|
import re |
|
import gradio as gr |
|
from threading import Thread |
|
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM |
|
from PIL import Image |
|
|
|
import subprocess |
|
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True) |
|
|
|
model_id = "vikhyatk/moondream2" |
|
revision = "2024-04-02" |
|
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision) |
|
moondream = AutoModelForCausalLM.from_pretrained( |
|
model_id, trust_remote_code=True, revision=revision, |
|
torch_dtype=torch.bfloat16, device_map={"": "cuda"}, |
|
attn_implementation="flash_attention_2" |
|
) |
|
moondream.eval() |
|
def print_like_dislike(x: gr.LikeData): |
|
print(x.index, x.value, x.liked) |
|
|
|
def add_message(history, message): |
|
|
|
if message["files"]: |
|
for x in message["files"]: |
|
history.append(((x,), None)) |
|
if message["text"] is not None: |
|
history.append((message["text"], None)) |
|
return history, gr.MultimodalTextbox(value=None, interactive=False) |
|
@spaces.GPU(duration=10) |
|
def bot(history): |
|
|
|
last_five_messages = history[-5:] |
|
image_path = None |
|
last_message = None |
|
for message in last_five_messages: |
|
if isinstance(message[0], tuple) and isinstance(message[0][0], str): |
|
image_path = message[0][0] |
|
if isinstance(message[0],str): |
|
last_message = message[0] |
|
if image_path: |
|
try: |
|
image = Image.open(image_path) |
|
image_embeds = moondream.encode_image(image) |
|
print(image_embeds.shape) |
|
response = moondream.answer_question(image_embeds, last_message, tokenizer) |
|
except IOError: |
|
response = "Failed to open image. Please check the image path or file permissions." |
|
else: |
|
image_embeds = torch.zeros(1, 729, 2048, dtype=torch.bfloat16, device='cuda') |
|
response = moondream.answer_question(image_embeds, last_message, tokenizer) |
|
|
|
history[-1][1] = "" |
|
for character in response: |
|
history[-1][1] += character |
|
yield history |
|
|
|
with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="emerald")) as demo: |
|
gr.Markdown( |
|
""" |
|
# AskMoondream: Moondream 2 Demonstration Space |
|
## Modularity AI presents this open source huggingface space for running fast experimental inferences on Moondream2. |
|
## Moondream2 is a 1.86B parameter model initialized with weights from SigLIP and Phi 1.5. |
|
""" |
|
) |
|
chatbot = gr.Chatbot( |
|
[], |
|
elem_id="chatbot", |
|
bubble_full_width=False, |
|
height = 450 |
|
) |
|
|
|
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False) |
|
chat_msg = chat_input.submit(add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input]) |
|
bot_msg = chat_msg.then(bot, inputs=chatbot, outputs=chatbot, api_name="bot_response") |
|
bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, outputs=[chat_input]) |
|
|
|
chatbot.like(print_like_dislike, None, None) |
|
|
|
demo.queue() |
|
demo.launch() |
|
|