shouryap commited on
Commit
49cc110
·
1 Parent(s): 12991b1
Files changed (2) hide show
  1. app.py +111 -52
  2. gitattributes +35 -0
app.py CHANGED
@@ -48,7 +48,7 @@ def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
48
 
49
  def image_transform_grounding(init_image):
50
  transform = T.Compose([
51
- # T.Resize((800, 833)),
52
  T.ToTensor(),
53
  T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
54
  ])
@@ -56,10 +56,10 @@ def image_transform_grounding(init_image):
56
  return init_image, image
57
 
58
  def image_transform_grounding_for_vis(init_image):
59
- # transform = T.Compose([
60
- # T.Resize((800, 833)),
61
- # ])
62
- # image, _ = transform(init_image, None) # 3, h, w
63
  return init_image
64
 
65
  model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
@@ -73,59 +73,118 @@ def run_grounding(input_image, grounding_caption, box_threshold, text_threshold)
73
 
74
  # run grounidng
75
  boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
76
- annotated_frame, detections = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
77
  image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
78
- return image_with_box,detections
 
 
79
 
80
  if __name__ == "__main__":
81
 
82
- parser = argparse.ArgumentParser("Grounding DINO demo", add_help=True)
83
- parser.add_argument("--debug", action="store_true", help="using debug mode")
84
- parser.add_argument("--share", action="store_true", help="share the app")
85
- args = parser.parse_args()
86
- css = """
87
- #mkd {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  height: 500px;
89
  overflow: auto;
90
  border: 1px solid #ccc;
91
- }
92
  """
93
- block = gr.Blocks(css=css).queue()
94
- with block:
95
- gr.Markdown("<h1><center>Grounding DINO<h1><center>")
96
- gr.Markdown("<h3><center>Open-World Detection with <a href='https://github.com/IDEA-Research/GroundingDINO'>Grounding DINO</a><h3><center>")
97
- gr.Markdown("<h3><center>Note the model runs on CPU, so it may take a while to run the model.<h3><center>")
98
-
99
- with gr.Row():
100
- with gr.Column():
101
- input_image = gr.Image(source='upload', type="pil")
102
- grounding_caption = gr.Textbox(label="Detection Prompt")
103
- run_button = gr.Button(label="Run")
104
- with gr.Accordion("Advanced options", open=False):
105
- box_threshold = gr.Slider(
106
- label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
107
- )
108
- text_threshold = gr.Slider(
109
- label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
110
- )
111
-
112
- with gr.Column():
113
- gallery = gr.outputs.Image(
114
- type="pil",
115
- # label="grounding results"
116
- ).style(full_width=True, full_height=True)
117
- # gallery = gr.Gallery(label="Generated images", show_label=False).style(
118
- # grid=[1], height="auto", container=True, full_width=True, full_height=True)
119
-
120
- run_button.click(fn=run_grounding, inputs=[
121
- input_image, grounding_caption, box_threshold, text_threshold], outputs=[gallery])
122
- gr.Examples(
123
- [["this_is_fine.png", "coffee cup", 0.25, 0.25]],
124
- inputs = [input_image, grounding_caption, box_threshold, text_threshold],
125
- outputs = [gallery],
126
- fn=run_grounding,
127
- cache_examples=True,
128
- label='Try this example input!'
129
- )
130
- block.launch(share=False, show_api=False, show_error=True)
 
 
 
 
 
131
 
 
48
 
49
  def image_transform_grounding(init_image):
50
  transform = T.Compose([
51
+ # T.RandomResize([800], max_size=1333),
52
  T.ToTensor(),
53
  T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
54
  ])
 
56
  return init_image, image
57
 
58
  def image_transform_grounding_for_vis(init_image):
59
+ transform = T.Compose([
60
+ T.RandomResize([800], max_size=1333),
61
+ ])
62
+ image, _ = transform(init_image, None) # 3, h, w
63
  return init_image
64
 
65
  model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
 
73
 
74
  # run grounidng
75
  boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
76
+ annotated_frame,detects = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
77
  image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
78
+
79
+
80
+ return image_with_box,detects
81
 
82
  if __name__ == "__main__":
83
 
84
+ # parser = argparse.ArgumentParser("Grounding DINO demo", add_help=True)
85
+ # parser.add_argument("--debug", action="store_true", help="using debug mode")
86
+ # parser.add_argument("--share", action="store_true", help="share the app")
87
+ # args = parser.parse_args()
88
+ # css = """
89
+ # #mkd {
90
+ # height: 500px;
91
+ # overflow: auto;
92
+ # border: 1px solid #ccc;
93
+ # }
94
+ # """
95
+ # block = gr.Blocks(css=css).queue()
96
+ # with block:
97
+ # gr.Markdown("<h1><center>Grounding DINO<h1><center>")
98
+ # gr.Markdown("<h3><center>Open-World Detection with <a href='https://github.com/IDEA-Research/GroundingDINO'>Grounding DINO</a><h3><center>")
99
+ # gr.Markdown("<h3><center>Note the model runs on CPU, so it may take a while to run the model.<h3><center>")
100
+
101
+ # with gr.Row():
102
+ # with gr.Column():
103
+ # input_image = gr.Image(source='upload', type="pil")
104
+ # grounding_caption = gr.Textbox(label="Detection Prompt")
105
+ # run_button = gr.Button(label="Run")
106
+ # with gr.Accordion("Advanced options", open=False):
107
+ # box_threshold = gr.Slider(
108
+ # label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
109
+ # )
110
+ # text_threshold = gr.Slider(
111
+ # label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
112
+ # )
113
+
114
+ # with gr.Column():
115
+ # gallery = gr.outputs.Image(
116
+ # type="pil",
117
+ # # label="grounding results"
118
+ # ).style(full_width=True, full_height=True)
119
+ # # gallery = gr.Gallery(label="Generated images", show_label=False).style(
120
+ # # grid=[1], height="auto", container=True, full_width=True, full_height=True)
121
+
122
+ # run_button.click(fn=run_grounding, inputs=[
123
+ # input_image, grounding_caption, box_threshold, text_threshold], outputs=[gallery])
124
+ # gr.Examples(
125
+ # [["this_is_fine.png", "coffee cup", 0.25, 0.25]],
126
+ # inputs = [input_image, grounding_caption, box_threshold, text_threshold],
127
+ # outputs = [gallery],
128
+ # fn=run_grounding,
129
+ # cache_examples=True,
130
+ # label='Try this example input!'
131
+ # )
132
+ # block.launch(share=False, show_api=False, show_error=True)
133
+
134
+
135
+ parser = argparse.ArgumentParser("Grounding DINO demo", add_help=True)
136
+ parser.add_argument("--debug", action="store_true", help="using debug mode")
137
+ parser.add_argument("--share", action="store_true", help="share the app")
138
+ args = parser.parse_args()
139
+
140
+ css = """
141
+ #mkd {
142
  height: 500px;
143
  overflow: auto;
144
  border: 1px solid #ccc;
145
+ }
146
  """
147
+
148
+ block = gr.Blocks(css=css).queue()
149
+ with block:
150
+ gr.Markdown("<h1><center>Grounding DINO<h1><center>")
151
+ gr.Markdown("<h3><center>Open-World Detection with <a href='https://github.com/IDEA-Research/GroundingDINO'>Grounding DINO</a><h3><center>")
152
+ gr.Markdown("<h3><center>Note the model runs on CPU, so it may take a while to run the model.<h3><center>")
153
+
154
+ with gr.Row():
155
+ with gr.Column():
156
+ input_image = gr.Image(source='upload', type="pil")
157
+ grounding_caption = gr.Textbox(label="Detection Prompt")
158
+ run_button = gr.Button(label="Run")
159
+ with gr.Accordion("Advanced options", open=False):
160
+ box_threshold = gr.Slider(
161
+ label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
162
+ )
163
+ text_threshold = gr.Slider(
164
+ label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
165
+ )
166
+
167
+ with gr.Column():
168
+ gallery = gr.outputs.Image(
169
+ type="pil",
170
+ # label="grounding results"
171
+ ).style(full_width=True, full_height=True)
172
+ detects_output = gr.Textbox(
173
+ label="Detected Phrases", interactive=False, visible=True
174
+ )
175
+
176
+ run_button.click(fn=run_grounding, inputs=[
177
+ input_image, grounding_caption, box_threshold, text_threshold], outputs=[gallery, detects_output])
178
+
179
+ gr.Examples(
180
+ [["this_is_fine.png", "coffee cup", 0.25, 0.25]],
181
+ inputs=[input_image, grounding_caption, box_threshold, text_threshold],
182
+ outputs=[gallery, detects_output],
183
+ fn=run_grounding,
184
+ cache_examples=True,
185
+ label='Try this example input!'
186
+ )
187
+
188
+ block.launch(share=False, show_api=False, show_error=True)
189
+
190
 
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ this_is_fine.png filter=lfs diff=lfs merge=lfs -text