pg56714 commited on
Commit
4afc829
·
verified ·
1 Parent(s): a79ac72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -34
app.py CHANGED
@@ -15,20 +15,41 @@ MARKDOWN = """
15
  # YOLO-World + EfficientViT-SAM
16
  Powered by Roboflow [Inference](https://github.com/roboflow/inference) and [Supervision](https://github.com/roboflow/supervision) and [YOLO-World](https://github.com/AILab-CVC/YOLO-World) and [EfficientViT-SAM](https://github.com/mit-han-lab/efficientvit)
17
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  # Load models
20
- yolo_world = YOLOWorld(model_id="yolo_world/l")
21
 
22
  # interence:The confidence score values in the new version of YOLO-World are abnormal due to a bug
23
  # old version not support=============================
24
- # yolo_world = YOLOWorld(model_id="yolo_world/s")
25
- # yolo_world = YOLOWorld(model_id="yolo_world/m")
26
- # yolo_world = YOLOWorld(model_id="yolo_world/x")
27
-
28
- # yolo_world = YOLOWorld(model_id="yolo_world/v2-s")
29
- # yolo_world = YOLOWorld(model_id="yolo_world/v2-m")
30
- # yolo_world = YOLOWorld(model_id="yolo_world/v2-l")
31
- # yolo_world = YOLOWorld(model_id="yolo_world/v2-x")
32
  # =====================================================
33
 
34
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -51,7 +72,7 @@ def annotate_image(
51
  input_image: np.ndarray,
52
  detections: sv.Detections,
53
  categories: List[str],
54
- with_confidence: bool = False,
55
  ) -> np.ndarray:
56
  labels = [
57
  (
@@ -68,20 +89,23 @@ def annotate_image(
68
 
69
 
70
  def process_image(
71
- image: np.ndarray,
72
  categories: str,
73
  confidence_threshold: float,
74
  nms_threshold: float,
75
  with_confidence: bool = True,
 
76
  with_segmentation: bool = True,
77
  ) -> np.ndarray:
 
 
78
  # Preparation.
79
  categories = process_categories(categories)
80
- yolo_world.set_classes(categories)
81
  # print("categories:", categories)
82
 
83
  # Object detection
84
- results = yolo_world.infer(image, confidence=confidence_threshold)
85
  detections = sv.Detections.from_inference(results).with_nms(
86
  class_agnostic=True, threshold=nms_threshold
87
  )
@@ -89,7 +113,7 @@ def process_image(
89
 
90
  # Segmentation
91
  if with_segmentation:
92
- sam.set_image(image, image_format="RGB")
93
  masks = []
94
  for xyxy in detections.xyxy:
95
  mask, _, _ = sam.predict(box=xyxy, multimask_output=False)
@@ -98,7 +122,7 @@ def process_image(
98
  # print("masks shaped as", detections.mask.shape)
99
 
100
  # Annotation
101
- output_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
102
  output_image = annotate_image(
103
  input_image=output_image,
104
  detections=detections,
@@ -144,13 +168,18 @@ with_confidence_component = gr.Checkbox(
144
  info=("Whether to display the confidence of the detected objects."),
145
  )
146
 
 
 
 
 
 
 
147
  with_segmentation_component = gr.Checkbox(
148
  value=True,
149
  label="With Segmentation",
150
  info=("Whether to run EfficientViT-SAM for instance segmentation."),
151
  )
152
 
153
-
154
  with gr.Blocks() as demo:
155
  gr.Markdown(MARKDOWN)
156
  with gr.Row():
@@ -169,33 +198,19 @@ with gr.Blocks() as demo:
169
  iou_threshold_component.render()
170
  with gr.Row():
171
  with_confidence_component.render()
 
172
  with_segmentation_component.render()
 
173
  gr.Examples(
174
  # fn=process_image,
175
- examples=[
176
- [
177
- os.path.join(os.path.dirname(__file__), "examples/livingroom.jpg"),
178
- "table, lamp, dog, sofa, plant, clock, carpet, frame on the wall",
179
- 0.05,
180
- 0.5,
181
- True,
182
- True,
183
- ],
184
- [
185
- os.path.join(os.path.dirname(__file__), "examples/cat_and_dogs.jpg"),
186
- "cat, dog",
187
- 0.2,
188
- 0.5,
189
- True,
190
- True,
191
- ],
192
- ],
193
  inputs=[
194
  input_image_component,
195
  image_categories_text_component,
196
  confidence_threshold_component,
197
  iou_threshold_component,
198
  with_confidence_component,
 
199
  with_segmentation_component,
200
  ],
201
  outputs=yolo_world_output_image_component,
@@ -209,6 +224,7 @@ with gr.Blocks() as demo:
209
  confidence_threshold_component,
210
  iou_threshold_component,
211
  with_confidence_component,
 
212
  with_segmentation_component,
213
  ],
214
  outputs=yolo_world_output_image_component,
 
15
  # YOLO-World + EfficientViT-SAM
16
  Powered by Roboflow [Inference](https://github.com/roboflow/inference) and [Supervision](https://github.com/roboflow/supervision) and [YOLO-World](https://github.com/AILab-CVC/YOLO-World) and [EfficientViT-SAM](https://github.com/mit-han-lab/efficientvit)
17
  """
18
+ IMAGE_EXAMPLES = [
19
+ [
20
+ os.path.join(os.path.dirname(__file__), "images/livingroom.jpg"),
21
+ "table, lamp, dog, sofa, plant, clock, carpet, frame on the wall",
22
+ 0.05,
23
+ 0.5,
24
+ True,
25
+ # True,
26
+ True,
27
+ ],
28
+ [
29
+ os.path.join(os.path.dirname(__file__), "images/cat_and_dogs.jpg"),
30
+ "cat, dog",
31
+ 0.2,
32
+ 0.5,
33
+ True,
34
+ # True,
35
+ True,
36
+ ],
37
+ ]
38
+
39
 
40
  # Load models
41
+ YOLO_WORLD_MODEL = YOLOWorld(model_id="yolo_world/l")
42
 
43
  # interence:The confidence score values in the new version of YOLO-World are abnormal due to a bug
44
  # old version not support=============================
45
+ # YOLO_WORLD_MODEL = YOLOWorld(model_id="yolo_world/s")
46
+ # YOLO_WORLD_MODEL = YOLOWorld(model_id="yolo_world/m")
47
+ # YOLO_WORLD_MODEL = YOLOWorld(model_id="yolo_world/x")
48
+
49
+ # YOLO_WORLD_MODEL = YOLOWorld(model_id="yolo_world/v2-s")
50
+ # YOLO_WORLD_MODEL = YOLOWorld(model_id="yolo_world/v2-m")
51
+ # YOLO_WORLD_MODEL = YOLOWorld(model_id="yolo_world/v2-l")
52
+ # YOLO_WORLD_MODEL = YOLOWorld(model_id="yolo_world/v2-x")
53
  # =====================================================
54
 
55
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
72
  input_image: np.ndarray,
73
  detections: sv.Detections,
74
  categories: List[str],
75
+ with_confidence: bool = True,
76
  ) -> np.ndarray:
77
  labels = [
78
  (
 
89
 
90
 
91
  def process_image(
92
+ input_image: np.ndarray,
93
  categories: str,
94
  confidence_threshold: float,
95
  nms_threshold: float,
96
  with_confidence: bool = True,
97
+ # with_class_agnostic_nms: bool = True,
98
  with_segmentation: bool = True,
99
  ) -> np.ndarray:
100
+ global exclude_positions
101
+
102
  # Preparation.
103
  categories = process_categories(categories)
104
+ YOLO_WORLD_MODEL.set_classes(categories)
105
  # print("categories:", categories)
106
 
107
  # Object detection
108
+ results = YOLO_WORLD_MODEL.infer(input_image, confidence=confidence_threshold)
109
  detections = sv.Detections.from_inference(results).with_nms(
110
  class_agnostic=True, threshold=nms_threshold
111
  )
 
113
 
114
  # Segmentation
115
  if with_segmentation:
116
+ sam.set_image(input_image, image_format="RGB")
117
  masks = []
118
  for xyxy in detections.xyxy:
119
  mask, _, _ = sam.predict(box=xyxy, multimask_output=False)
 
122
  # print("masks shaped as", detections.mask.shape)
123
 
124
  # Annotation
125
+ output_image = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR)
126
  output_image = annotate_image(
127
  input_image=output_image,
128
  detections=detections,
 
168
  info=("Whether to display the confidence of the detected objects."),
169
  )
170
 
171
+ # with_class_agnostic_nms_component = gr.Checkbox(
172
+ # value=True,
173
+ # label="Use Class-Agnostic NMS",
174
+ # info=("Suppress overlapping detections across different classes."),
175
+ # )
176
+
177
  with_segmentation_component = gr.Checkbox(
178
  value=True,
179
  label="With Segmentation",
180
  info=("Whether to run EfficientViT-SAM for instance segmentation."),
181
  )
182
 
 
183
  with gr.Blocks() as demo:
184
  gr.Markdown(MARKDOWN)
185
  with gr.Row():
 
198
  iou_threshold_component.render()
199
  with gr.Row():
200
  with_confidence_component.render()
201
+ # with_class_agnostic_nms_component.render()
202
  with_segmentation_component.render()
203
+
204
  gr.Examples(
205
  # fn=process_image,
206
+ examples=IMAGE_EXAMPLES,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  inputs=[
208
  input_image_component,
209
  image_categories_text_component,
210
  confidence_threshold_component,
211
  iou_threshold_component,
212
  with_confidence_component,
213
+ # with_class_agnostic_nms_component,
214
  with_segmentation_component,
215
  ],
216
  outputs=yolo_world_output_image_component,
 
224
  confidence_threshold_component,
225
  iou_threshold_component,
226
  with_confidence_component,
227
+ # with_class_agnostic_nms_component,
228
  with_segmentation_component,
229
  ],
230
  outputs=yolo_world_output_image_component,