Csplk commited on
Commit
eb99994
Β·
verified Β·
1 Parent(s): b150481

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import argparse
3
+ import torch
4
+ import re
5
+ import gradio as gr
6
+ from threading import Thread
7
+ from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
8
+ from PIL import Image
9
+
10
+ parser = argparse.ArgumentParser()
11
+
12
+ model_id = "vikhyat/moondream2"
13
+ revision = "2024-04-02"
14
+ tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
15
+ moondream = AutoModelForCausalLM.from_pretrained(
16
+ model_id, trust_remote_code=True, revision=revision,
17
+ torch_dtype=torch.float32
18
+ )
19
+ moondream.eval()
20
+
21
+ @spaces.GPU(duration=10)
22
+ def answer_question(images, prompts):
23
+ image_embeds = [moondream.encode_image(img) for img in images]
24
+ image_embeds = torch.cat(image_embeds, dim=0)
25
+ answers = moondream.batch_answer(
26
+ images=image_embeds,
27
+ prompts=prompts,
28
+ tokenizer=tokenizer
29
+ )
30
+ return [answer for answer in answers]
31
+
32
+ with gr.Blocks() as demo:
33
+ gr.Markdown(
34
+ """
35
+ # πŸŒ” moondream2
36
+ A tiny vision language model. [GitHub](https://github.com/vikhyat/moondream)
37
+ """
38
+ )
39
+ with gr.Row():
40
+ prompts = gr.Textbox(label="Input", placeholder="Type here...", scale=4)
41
+ submit = gr.Button("Submit")
42
+ with gr.Row():
43
+ images = gr.Image(type="pil", label="Upload Images", multiple=True)
44
+ output = gr.Textbox(label="Response", multiple=True)
45
+ submit.click(answer_question, [images, prompts], output)
46
+ prompts.submit(answer_question, [images, prompts], output)
47
+
48
+ demo.queue().launch()