moondream-webui / app.py
LukeJacob2023's picture
Update app.py
9328f5f verified
raw
history blame
2.5 kB
import spaces
import torch
import re
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from PIL import Image
if torch.cuda.is_available():
device, dtype = "cuda", torch.float16
else:
device, dtype = "cpu", torch.float32
model_id = "vikhyatk/moondream2"
revision = "2024-08-26"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
moondream = AutoModelForCausalLM.from_pretrained(
model_id, trust_remote_code=True, revision=revision, torch_dtype=dtype
).to(device=device)
moondream.eval()
@spaces.GPU
def answer_questions(image_tuples, prompt_text):
result = ""
Q_and_A = ""
prompts = [p.strip() for p in prompt_text.split(',')]
image_embeds = [img[0] for img in image_tuples if img[0] is not None]
#print(f"\nprompts: {prompts}\n\n")
answers = []
for prompt in prompts:
image_answers = moondream.batch_answer(
images=[img.convert("RGB") for img in image_embeds],
prompts=[prompt] * len(image_embeds),
tokenizer=tokenizer,
)
answers.append(image_answers)
for i, prompt in enumerate(prompts):
Q_and_A += f"### Q: {prompt}\n"
for j, image_tuple in enumerate(image_tuples):
image_name = f"image{j+1}"
answer_text = answers[i][j]
Q_and_A += f"**{image_name} A:** \n {answer_text} \n\n"
result = {'headers': prompts, 'data': answers}
#print(f"result\n{result}\n\nQ_and_A\n{Q_and_A}\n\n")
return Q_and_A, result
with gr.Blocks() as demo:
gr.Markdown("# MoonDream WebUI")
gr.Markdown("## πŸŒ” WebUI is modify by https://huggingface.co/spaces/Csplk/moondream2-batch-processing")
gr.Markdown("## πŸŒ” moondream2 - A tiny vision language model. [GitHub](https://github.com/vikhyatk/moondream)")
with gr.Row():
img = gr.Gallery(label="Upload Images", type="pil", preview=True, columns=4)
with gr.Row():
prompt = gr.Textbox(label="Input Prompts", placeholder="Enter prompts (one prompt for each image provided) separated by commas. Ex: Describe this image, What is in this image?", lines=8)
with gr.Row():
submit = gr.Button("Submit")
with gr.Row():
output = gr.Markdown(label="Questions and Answers", line_breaks=True)
with gr.Row():
output2 = gr.Dataframe(label="Structured Dataframe", type="array", wrap=True)
submit.click(answer_questions, [img, prompt], [output, output2])
demo.queue().launch()