Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,20 +15,41 @@ MARKDOWN = """
|
|
15 |
# YOLO-World + EfficientViT-SAM
|
16 |
Powered by Roboflow [Inference](https://github.com/roboflow/inference) and [Supervision](https://github.com/roboflow/supervision) and [YOLO-World](https://github.com/AILab-CVC/YOLO-World) and [EfficientViT-SAM](https://github.com/mit-han-lab/efficientvit)
|
17 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# Load models
|
20 |
-
|
21 |
|
22 |
# interence:The confidence score values in the new version of YOLO-World are abnormal due to a bug
|
23 |
# old version not support=============================
|
24 |
-
#
|
25 |
-
#
|
26 |
-
#
|
27 |
-
|
28 |
-
#
|
29 |
-
#
|
30 |
-
#
|
31 |
-
#
|
32 |
# =====================================================
|
33 |
|
34 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -51,7 +72,7 @@ def annotate_image(
|
|
51 |
input_image: np.ndarray,
|
52 |
detections: sv.Detections,
|
53 |
categories: List[str],
|
54 |
-
with_confidence: bool =
|
55 |
) -> np.ndarray:
|
56 |
labels = [
|
57 |
(
|
@@ -68,20 +89,23 @@ def annotate_image(
|
|
68 |
|
69 |
|
70 |
def process_image(
|
71 |
-
|
72 |
categories: str,
|
73 |
confidence_threshold: float,
|
74 |
nms_threshold: float,
|
75 |
with_confidence: bool = True,
|
|
|
76 |
with_segmentation: bool = True,
|
77 |
) -> np.ndarray:
|
|
|
|
|
78 |
# Preparation.
|
79 |
categories = process_categories(categories)
|
80 |
-
|
81 |
# print("categories:", categories)
|
82 |
|
83 |
# Object detection
|
84 |
-
results =
|
85 |
detections = sv.Detections.from_inference(results).with_nms(
|
86 |
class_agnostic=True, threshold=nms_threshold
|
87 |
)
|
@@ -89,7 +113,7 @@ def process_image(
|
|
89 |
|
90 |
# Segmentation
|
91 |
if with_segmentation:
|
92 |
-
sam.set_image(
|
93 |
masks = []
|
94 |
for xyxy in detections.xyxy:
|
95 |
mask, _, _ = sam.predict(box=xyxy, multimask_output=False)
|
@@ -98,7 +122,7 @@ def process_image(
|
|
98 |
# print("masks shaped as", detections.mask.shape)
|
99 |
|
100 |
# Annotation
|
101 |
-
output_image = cv2.cvtColor(
|
102 |
output_image = annotate_image(
|
103 |
input_image=output_image,
|
104 |
detections=detections,
|
@@ -144,13 +168,18 @@ with_confidence_component = gr.Checkbox(
|
|
144 |
info=("Whether to display the confidence of the detected objects."),
|
145 |
)
|
146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
with_segmentation_component = gr.Checkbox(
|
148 |
value=True,
|
149 |
label="With Segmentation",
|
150 |
info=("Whether to run EfficientViT-SAM for instance segmentation."),
|
151 |
)
|
152 |
|
153 |
-
|
154 |
with gr.Blocks() as demo:
|
155 |
gr.Markdown(MARKDOWN)
|
156 |
with gr.Row():
|
@@ -169,33 +198,19 @@ with gr.Blocks() as demo:
|
|
169 |
iou_threshold_component.render()
|
170 |
with gr.Row():
|
171 |
with_confidence_component.render()
|
|
|
172 |
with_segmentation_component.render()
|
|
|
173 |
gr.Examples(
|
174 |
# fn=process_image,
|
175 |
-
examples=
|
176 |
-
[
|
177 |
-
os.path.join(os.path.dirname(__file__), "examples/livingroom.jpg"),
|
178 |
-
"table, lamp, dog, sofa, plant, clock, carpet, frame on the wall",
|
179 |
-
0.05,
|
180 |
-
0.5,
|
181 |
-
True,
|
182 |
-
True,
|
183 |
-
],
|
184 |
-
[
|
185 |
-
os.path.join(os.path.dirname(__file__), "examples/cat_and_dogs.jpg"),
|
186 |
-
"cat, dog",
|
187 |
-
0.2,
|
188 |
-
0.5,
|
189 |
-
True,
|
190 |
-
True,
|
191 |
-
],
|
192 |
-
],
|
193 |
inputs=[
|
194 |
input_image_component,
|
195 |
image_categories_text_component,
|
196 |
confidence_threshold_component,
|
197 |
iou_threshold_component,
|
198 |
with_confidence_component,
|
|
|
199 |
with_segmentation_component,
|
200 |
],
|
201 |
outputs=yolo_world_output_image_component,
|
@@ -209,6 +224,7 @@ with gr.Blocks() as demo:
|
|
209 |
confidence_threshold_component,
|
210 |
iou_threshold_component,
|
211 |
with_confidence_component,
|
|
|
212 |
with_segmentation_component,
|
213 |
],
|
214 |
outputs=yolo_world_output_image_component,
|
|
|
15 |
# YOLO-World + EfficientViT-SAM
|
16 |
Powered by Roboflow [Inference](https://github.com/roboflow/inference) and [Supervision](https://github.com/roboflow/supervision) and [YOLO-World](https://github.com/AILab-CVC/YOLO-World) and [EfficientViT-SAM](https://github.com/mit-han-lab/efficientvit)
|
17 |
"""
|
18 |
+
IMAGE_EXAMPLES = [
|
19 |
+
[
|
20 |
+
os.path.join(os.path.dirname(__file__), "images/livingroom.jpg"),
|
21 |
+
"table, lamp, dog, sofa, plant, clock, carpet, frame on the wall",
|
22 |
+
0.05,
|
23 |
+
0.5,
|
24 |
+
True,
|
25 |
+
# True,
|
26 |
+
True,
|
27 |
+
],
|
28 |
+
[
|
29 |
+
os.path.join(os.path.dirname(__file__), "images/cat_and_dogs.jpg"),
|
30 |
+
"cat, dog",
|
31 |
+
0.2,
|
32 |
+
0.5,
|
33 |
+
True,
|
34 |
+
# True,
|
35 |
+
True,
|
36 |
+
],
|
37 |
+
]
|
38 |
+
|
39 |
|
40 |
# Load models
|
41 |
+
YOLO_WORLD_MODEL = YOLOWorld(model_id="yolo_world/l")
|
42 |
|
43 |
# interence:The confidence score values in the new version of YOLO-World are abnormal due to a bug
|
44 |
# old version not support=============================
|
45 |
+
# YOLO_WORLD_MODEL = YOLOWorld(model_id="yolo_world/s")
|
46 |
+
# YOLO_WORLD_MODEL = YOLOWorld(model_id="yolo_world/m")
|
47 |
+
# YOLO_WORLD_MODEL = YOLOWorld(model_id="yolo_world/x")
|
48 |
+
|
49 |
+
# YOLO_WORLD_MODEL = YOLOWorld(model_id="yolo_world/v2-s")
|
50 |
+
# YOLO_WORLD_MODEL = YOLOWorld(model_id="yolo_world/v2-m")
|
51 |
+
# YOLO_WORLD_MODEL = YOLOWorld(model_id="yolo_world/v2-l")
|
52 |
+
# YOLO_WORLD_MODEL = YOLOWorld(model_id="yolo_world/v2-x")
|
53 |
# =====================================================
|
54 |
|
55 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
72 |
input_image: np.ndarray,
|
73 |
detections: sv.Detections,
|
74 |
categories: List[str],
|
75 |
+
with_confidence: bool = True,
|
76 |
) -> np.ndarray:
|
77 |
labels = [
|
78 |
(
|
|
|
89 |
|
90 |
|
91 |
def process_image(
|
92 |
+
input_image: np.ndarray,
|
93 |
categories: str,
|
94 |
confidence_threshold: float,
|
95 |
nms_threshold: float,
|
96 |
with_confidence: bool = True,
|
97 |
+
# with_class_agnostic_nms: bool = True,
|
98 |
with_segmentation: bool = True,
|
99 |
) -> np.ndarray:
|
100 |
+
global exclude_positions
|
101 |
+
|
102 |
# Preparation.
|
103 |
categories = process_categories(categories)
|
104 |
+
YOLO_WORLD_MODEL.set_classes(categories)
|
105 |
# print("categories:", categories)
|
106 |
|
107 |
# Object detection
|
108 |
+
results = YOLO_WORLD_MODEL.infer(input_image, confidence=confidence_threshold)
|
109 |
detections = sv.Detections.from_inference(results).with_nms(
|
110 |
class_agnostic=True, threshold=nms_threshold
|
111 |
)
|
|
|
113 |
|
114 |
# Segmentation
|
115 |
if with_segmentation:
|
116 |
+
sam.set_image(input_image, image_format="RGB")
|
117 |
masks = []
|
118 |
for xyxy in detections.xyxy:
|
119 |
mask, _, _ = sam.predict(box=xyxy, multimask_output=False)
|
|
|
122 |
# print("masks shaped as", detections.mask.shape)
|
123 |
|
124 |
# Annotation
|
125 |
+
output_image = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR)
|
126 |
output_image = annotate_image(
|
127 |
input_image=output_image,
|
128 |
detections=detections,
|
|
|
168 |
info=("Whether to display the confidence of the detected objects."),
|
169 |
)
|
170 |
|
171 |
+
# with_class_agnostic_nms_component = gr.Checkbox(
|
172 |
+
# value=True,
|
173 |
+
# label="Use Class-Agnostic NMS",
|
174 |
+
# info=("Suppress overlapping detections across different classes."),
|
175 |
+
# )
|
176 |
+
|
177 |
with_segmentation_component = gr.Checkbox(
|
178 |
value=True,
|
179 |
label="With Segmentation",
|
180 |
info=("Whether to run EfficientViT-SAM for instance segmentation."),
|
181 |
)
|
182 |
|
|
|
183 |
with gr.Blocks() as demo:
|
184 |
gr.Markdown(MARKDOWN)
|
185 |
with gr.Row():
|
|
|
198 |
iou_threshold_component.render()
|
199 |
with gr.Row():
|
200 |
with_confidence_component.render()
|
201 |
+
# with_class_agnostic_nms_component.render()
|
202 |
with_segmentation_component.render()
|
203 |
+
|
204 |
gr.Examples(
|
205 |
# fn=process_image,
|
206 |
+
examples=IMAGE_EXAMPLES,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
inputs=[
|
208 |
input_image_component,
|
209 |
image_categories_text_component,
|
210 |
confidence_threshold_component,
|
211 |
iou_threshold_component,
|
212 |
with_confidence_component,
|
213 |
+
# with_class_agnostic_nms_component,
|
214 |
with_segmentation_component,
|
215 |
],
|
216 |
outputs=yolo_world_output_image_component,
|
|
|
224 |
confidence_threshold_component,
|
225 |
iou_threshold_component,
|
226 |
with_confidence_component,
|
227 |
+
# with_class_agnostic_nms_component,
|
228 |
with_segmentation_component,
|
229 |
],
|
230 |
outputs=yolo_world_output_image_component,
|