Grounding

Build error

App Files Files Community

shouryap commited on Jan 3

Commit

49cc110

1 Parent(s): 12991b1

File

Browse files

Files changed (2) hide show

app.py +111 -52
gitattributes +35 -0

app.py CHANGED Viewed

@@ -48,7 +48,7 @@ def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
 def image_transform_grounding(init_image):
     transform = T.Compose([
-        # T.Resize((800, 833)),
         T.ToTensor(),
         T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
     ])
@@ -56,10 +56,10 @@ def image_transform_grounding(init_image):
     return init_image, image
 def image_transform_grounding_for_vis(init_image):
-    # transform = T.Compose([
-    #     T.Resize((800, 833)),
-    # ])
-    # image, _ = transform(init_image, None) # 3, h, w
     return init_image
 model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
@@ -73,59 +73,118 @@ def run_grounding(input_image, grounding_caption, box_threshold, text_threshold)
     # run grounidng
     boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
-    annotated_frame, detections = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
     image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
-    return image_with_box,detections
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser("Grounding DINO demo", add_help=True)
-    parser.add_argument("--debug", action="store_true", help="using debug mode")
-    parser.add_argument("--share", action="store_true", help="share the app")
-    args = parser.parse_args()
-    css = """
-  #mkd {
     height: 500px;
     overflow: auto;
     border: 1px solid #ccc;
-  }
 """
-    block = gr.Blocks(css=css).queue()
-    with block:
-        gr.Markdown("<h1><center>Grounding DINO<h1><center>")
-        gr.Markdown("<h3><center>Open-World Detection with <a href='https://github.com/IDEA-Research/GroundingDINO'>Grounding DINO</a><h3><center>")
-        gr.Markdown("<h3><center>Note the model runs on CPU, so it may take a while to run the model.<h3><center>")
-        with gr.Row():
-            with gr.Column():
-                input_image = gr.Image(source='upload', type="pil")
-                grounding_caption = gr.Textbox(label="Detection Prompt")
-                run_button = gr.Button(label="Run")
-                with gr.Accordion("Advanced options", open=False):
-                    box_threshold = gr.Slider(
-                        label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
-                    )
-                    text_threshold = gr.Slider(
-                        label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
-                    )
-            with gr.Column():
-                gallery = gr.outputs.Image(
-                    type="pil",
-                    # label="grounding results"
-                ).style(full_width=True, full_height=True)
-                # gallery = gr.Gallery(label="Generated images", show_label=False).style(
-                #         grid=[1], height="auto", container=True, full_width=True, full_height=True)
-        run_button.click(fn=run_grounding, inputs=[
-                        input_image, grounding_caption, box_threshold, text_threshold], outputs=[gallery])
-        gr.Examples(
-          [["this_is_fine.png", "coffee cup", 0.25, 0.25]],
-          inputs = [input_image, grounding_caption, box_threshold, text_threshold],
-          outputs = [gallery],
-          fn=run_grounding,
-          cache_examples=True,
-          label='Try this example input!'
-      )
-    block.launch(share=False, show_api=False, show_error=True)

 def image_transform_grounding(init_image):
     transform = T.Compose([
+        # T.RandomResize([800], max_size=1333),
         T.ToTensor(),
         T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
     ])
     return init_image, image
 def image_transform_grounding_for_vis(init_image):
+    transform = T.Compose([
+        T.RandomResize([800], max_size=1333),
+    ])
+    image, _ = transform(init_image, None) # 3, h, w
     return init_image
 model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
     # run grounidng
     boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
+    annotated_frame,detects = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
     image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
+    return image_with_box,detects
 if __name__ == "__main__":
+#     parser = argparse.ArgumentParser("Grounding DINO demo", add_help=True)
+#     parser.add_argument("--debug", action="store_true", help="using debug mode")
+#     parser.add_argument("--share", action="store_true", help="share the app")
+#     args = parser.parse_args()
+#     css = """
+#   #mkd {
+#     height: 500px;
+#     overflow: auto;
+#     border: 1px solid #ccc;
+#   }
+# """
+#     block = gr.Blocks(css=css).queue()
+#     with block:
+#         gr.Markdown("<h1><center>Grounding DINO<h1><center>")
+#         gr.Markdown("<h3><center>Open-World Detection with <a href='https://github.com/IDEA-Research/GroundingDINO'>Grounding DINO</a><h3><center>")
+#         gr.Markdown("<h3><center>Note the model runs on CPU, so it may take a while to run the model.<h3><center>")
+#         with gr.Row():
+#             with gr.Column():
+#                 input_image = gr.Image(source='upload', type="pil")
+#                 grounding_caption = gr.Textbox(label="Detection Prompt")
+#                 run_button = gr.Button(label="Run")
+#                 with gr.Accordion("Advanced options", open=False):
+#                     box_threshold = gr.Slider(
+#                         label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
+#                     )
+#                     text_threshold = gr.Slider(
+#                         label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
+#                     )
+#             with gr.Column():
+#                 gallery = gr.outputs.Image(
+#                     type="pil",
+#                     # label="grounding results"
+#                 ).style(full_width=True, full_height=True)
+#                 # gallery = gr.Gallery(label="Generated images", show_label=False).style(
+#                 #         grid=[1], height="auto", container=True, full_width=True, full_height=True)
+#         run_button.click(fn=run_grounding, inputs=[
+#                         input_image, grounding_caption, box_threshold, text_threshold], outputs=[gallery])
+#         gr.Examples(
+#           [["this_is_fine.png", "coffee cup", 0.25, 0.25]],
+#           inputs = [input_image, grounding_caption, box_threshold, text_threshold],
+#           outputs = [gallery],
+#           fn=run_grounding,
+#           cache_examples=True,
+#           label='Try this example input!'
+#       )
+#     block.launch(share=False, show_api=False, show_error=True)
+parser = argparse.ArgumentParser("Grounding DINO demo", add_help=True)
+parser.add_argument("--debug", action="store_true", help="using debug mode")
+parser.add_argument("--share", action="store_true", help="share the app")
+args = parser.parse_args()
+css = """
+#mkd {
     height: 500px;
     overflow: auto;
     border: 1px solid #ccc;
+}
 """
+block = gr.Blocks(css=css).queue()
+with block:
+    gr.Markdown("<h1><center>Grounding DINO<h1><center>")
+    gr.Markdown("<h3><center>Open-World Detection with <a href='https://github.com/IDEA-Research/GroundingDINO'>Grounding DINO</a><h3><center>")
+    gr.Markdown("<h3><center>Note the model runs on CPU, so it may take a while to run the model.<h3><center>")
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(source='upload', type="pil")
+            grounding_caption = gr.Textbox(label="Detection Prompt")
+            run_button = gr.Button(label="Run")
+            with gr.Accordion("Advanced options", open=False):
+                box_threshold = gr.Slider(
+                    label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
+                )
+                text_threshold = gr.Slider(
+                    label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
+                )
+        with gr.Column():
+            gallery = gr.outputs.Image(
+                type="pil",
+                # label="grounding results"
+            ).style(full_width=True, full_height=True)
+            detects_output = gr.Textbox(
+                label="Detected Phrases", interactive=False, visible=True
+            )
+    run_button.click(fn=run_grounding, inputs=[
+        input_image, grounding_caption, box_threshold, text_threshold], outputs=[gallery, detects_output])
+    gr.Examples(
+        [["this_is_fine.png", "coffee cup", 0.25, 0.25]],
+        inputs=[input_image, grounding_caption, box_threshold, text_threshold],
+        outputs=[gallery, detects_output],
+        fn=run_grounding,
+        cache_examples=True,
+        label='Try this example input!'
+    )
+block.launch(share=False, show_api=False, show_error=True)

gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+this_is_fine.png filter=lfs diff=lfs merge=lfs -text