AskMoondream / app.py
hanzla's picture
chat interface
2e92739
raw
history blame
2.49 kB
import spaces
import torch
import re
import gradio as gr
from threading import Thread
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
import subprocess
# Install flash-attn for faster inference
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
# Model and tokenizer setup
model_id = "vikhyatk/moondream2"
revision = "2024-04-02"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
moondream = AutoModelForCausalLM.from_pretrained(
model_id, trust_remote_code=True, revision=revision,
torch_dtype=torch.bfloat16, device_map={"": "cuda"},
attn_implementation="flash_attention_2")
moondream.eval()
# Function to generate responses
@spaces.GPU(duration=10)
def answer_question(img, prompt):
image_embeds = moondream.encode_image(img)
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
thread = Thread(
target=moondream.answer_question,
kwargs={
"image_embeds": image_embeds,
"question": prompt,
"tokenizer": tokenizer,
"streamer": streamer,
},
)
thread.start()
buffer = ""
for new_text in streamer:
buffer += new_text
yield buffer.strip()
# Create the Gradio interface
with gr.Blocks(theme="Monochrome") as demo:
gr.Markdown(
"""
# AskMoondream: Moondream 2 Demonstration Space
Moondream2 is a 1.86B parameter model initialized with weights from SigLIP and Phi 1.5.
Modularity AI presents this open source huggingface space for running fast experimental inferences on Moondream2.
"""
)
# Chatbot layout
chatbot = gr.Chatbot()
# Image upload and prompt input
with gr.Row():
img = gr.Image(type="pil", label="Upload an Image")
prompt = gr.Textbox(label="Your Question", placeholder="Ask something about the image...", show_label=False)
# Send message button
send_btn = gr.Button("Send")
# Function to send message and get response
def send_message(history, prompt):
history.append((prompt, None))
response = answer_question(img.value, prompt)
history.append((None, response))
return history, "" # Clear the input box
send_btn.click(send_message, [chatbot, prompt], [chatbot, prompt])
prompt.submit(send_message, [chatbot, prompt], [chatbot, prompt])
demo.queue().launch()