Spaces:

microsoft
/

OmniParser

Running on Zero

App Files Files Community

OmniParser / util /box_annotator.py

adamlu1

1st

0fc5095 18 days ago

raw

history blame

10.4 kB

	from typing import List, Optional, Union, Tuple

	import cv2
	import numpy as np

	from supervision.detection.core import Detections
	from supervision.draw.color import Color, ColorPalette


	class BoxAnnotator:
	"""
	A class for drawing bounding boxes on an image using detections provided.

	Attributes:
	color (Union[Color, ColorPalette]): The color to draw the bounding box,
	can be a single color or a color palette
	thickness (int): The thickness of the bounding box lines, default is 2
	text_color (Color): The color of the text on the bounding box, default is white
	text_scale (float): The scale of the text on the bounding box, default is 0.5
	text_thickness (int): The thickness of the text on the bounding box,
	default is 1
	text_padding (int): The padding around the text on the bounding box,
	default is 5

	"""

	def __init__(
	self,
	color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
	thickness: int = 3, # 1 for seeclick 2 for mind2web and 3 for demo
	text_color: Color = Color.BLACK,
	text_scale: float = 0.5, # 0.8 for mobile/web, 0.3 for desktop # 0.4 for mind2web
	text_thickness: int = 2, #1, # 2 for demo
	text_padding: int = 10,
	avoid_overlap: bool = True,
	):
	self.color: Union[Color, ColorPalette] = color
	self.thickness: int = thickness
	self.text_color: Color = text_color
	self.text_scale: float = text_scale
	self.text_thickness: int = text_thickness
	self.text_padding: int = text_padding
	self.avoid_overlap: bool = avoid_overlap

	def annotate(
	self,
	scene: np.ndarray,
	detections: Detections,
	labels: Optional[List[str]] = None,
	skip_label: bool = False,
	image_size: Optional[Tuple[int, int]] = None,
	) -> np.ndarray:
	"""
	Draws bounding boxes on the frame using the detections provided.

	Args:
	scene (np.ndarray): The image on which the bounding boxes will be drawn
	detections (Detections): The detections for which the
	bounding boxes will be drawn
	labels (Optional[List[str]]): An optional list of labels
	corresponding to each detection. If `labels` are not provided,
	corresponding `class_id` will be used as label.
	skip_label (bool): Is set to `True`, skips bounding box label annotation.
	Returns:
	np.ndarray: The image with the bounding boxes drawn on it

	Example:
	```python
	import supervision as sv

	classes = ['person', ...]
	image = ...
	detections = sv.Detections(...)

	box_annotator = sv.BoxAnnotator()
	labels = [
	f"{classes[class_id]} {confidence:0.2f}"
	for _, _, confidence, class_id, _ in detections
	]
	annotated_frame = box_annotator.annotate(
	scene=image.copy(),
	detections=detections,
	labels=labels
	)
	```
	"""
	font = cv2.FONT_HERSHEY_SIMPLEX
	for i in range(len(detections)):
	x1, y1, x2, y2 = detections.xyxy[i].astype(int)
	class_id = (
	detections.class_id[i] if detections.class_id is not None else None
	)
	idx = class_id if class_id is not None else i
	color = (
	self.color.by_idx(idx)
	if isinstance(self.color, ColorPalette)
	else self.color
	)
	cv2.rectangle(
	img=scene,
	pt1=(x1, y1),
	pt2=(x2, y2),
	color=color.as_bgr(),
	thickness=self.thickness,
	)
	if skip_label:
	continue

	text = (
	f"{class_id}"
	if (labels is None or len(detections) != len(labels))
	else labels[i]
	)

	text_width, text_height = cv2.getTextSize(
	text=text,
	fontFace=font,
	fontScale=self.text_scale,
	thickness=self.text_thickness,
	)[0]

	if not self.avoid_overlap:
	text_x = x1 + self.text_padding
	text_y = y1 - self.text_padding

	text_background_x1 = x1
	text_background_y1 = y1 - 2 * self.text_padding - text_height

	text_background_x2 = x1 + 2 * self.text_padding + text_width
	text_background_y2 = y1
	# text_x = x1 - self.text_padding - text_width
	# text_y = y1 + self.text_padding + text_height
	# text_background_x1 = x1 - 2 * self.text_padding - text_width
	# text_background_y1 = y1
	# text_background_x2 = x1
	# text_background_y2 = y1 + 2 * self.text_padding + text_height
	else:
	text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2 = get_optimal_label_pos(self.text_padding, text_width, text_height, x1, y1, x2, y2, detections, image_size)

	cv2.rectangle(
	img=scene,
	pt1=(text_background_x1, text_background_y1),
	pt2=(text_background_x2, text_background_y2),
	color=color.as_bgr(),
	thickness=cv2.FILLED,
	)
	# import pdb; pdb.set_trace()
	box_color = color.as_rgb()
	luminance = 0.299 * box_color[0] + 0.587 * box_color[1] + 0.114 * box_color[2]
	text_color = (0,0,0) if luminance > 160 else (255,255,255)
	cv2.putText(
	img=scene,
	text=text,
	org=(text_x, text_y),
	fontFace=font,
	fontScale=self.text_scale,
	# color=self.text_color.as_rgb(),
	color=text_color,
	thickness=self.text_thickness,
	lineType=cv2.LINE_AA,
	)
	return scene


	def box_area(box):
	return (box[2] - box[0]) * (box[3] - box[1])

	def intersection_area(box1, box2):
	x1 = max(box1[0], box2[0])
	y1 = max(box1[1], box2[1])
	x2 = min(box1[2], box2[2])
	y2 = min(box1[3], box2[3])
	return max(0, x2 - x1) * max(0, y2 - y1)

	def IoU(box1, box2, return_max=True):
	intersection = intersection_area(box1, box2)
	union = box_area(box1) + box_area(box2) - intersection
	if box_area(box1) > 0 and box_area(box2) > 0:
	ratio1 = intersection / box_area(box1)
	ratio2 = intersection / box_area(box2)
	else:
	ratio1, ratio2 = 0, 0
	if return_max:
	return max(intersection / union, ratio1, ratio2)
	else:
	return intersection / union


	def get_optimal_label_pos(text_padding, text_width, text_height, x1, y1, x2, y2, detections, image_size):
	""" check overlap of text and background detection box, and get_optimal_label_pos,
	pos: str, position of the text, must be one of 'top left', 'top right', 'outer left', 'outer right' TODO: if all are overlapping, return the last one, i.e. outer right
	Threshold: default to 0.3
	"""

	def get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size):
	is_overlap = False
	for i in range(len(detections)):
	detection = detections.xyxy[i].astype(int)
	if IoU([text_background_x1, text_background_y1, text_background_x2, text_background_y2], detection) > 0.3:
	is_overlap = True
	break
	# check if the text is out of the image
	if text_background_x1 < 0 or text_background_x2 > image_size[0] or text_background_y1 < 0 or text_background_y2 > image_size[1]:
	is_overlap = True
	return is_overlap

	# if pos == 'top left':
	text_x = x1 + text_padding
	text_y = y1 - text_padding

	text_background_x1 = x1
	text_background_y1 = y1 - 2 * text_padding - text_height

	text_background_x2 = x1 + 2 * text_padding + text_width
	text_background_y2 = y1
	is_overlap = get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size)
	if not is_overlap:
	return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2

	# elif pos == 'outer left':
	text_x = x1 - text_padding - text_width
	text_y = y1 + text_padding + text_height

	text_background_x1 = x1 - 2 * text_padding - text_width
	text_background_y1 = y1

	text_background_x2 = x1
	text_background_y2 = y1 + 2 * text_padding + text_height
	is_overlap = get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size)
	if not is_overlap:
	return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2


	# elif pos == 'outer right':
	text_x = x2 + text_padding
	text_y = y1 + text_padding + text_height

	text_background_x1 = x2
	text_background_y1 = y1

	text_background_x2 = x2 + 2 * text_padding + text_width
	text_background_y2 = y1 + 2 * text_padding + text_height

	is_overlap = get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size)
	if not is_overlap:
	return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2

	# elif pos == 'top right':
	text_x = x2 - text_padding - text_width
	text_y = y1 - text_padding

	text_background_x1 = x2 - 2 * text_padding - text_width
	text_background_y1 = y1 - 2 * text_padding - text_height

	text_background_x2 = x2
	text_background_y2 = y1

	is_overlap = get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size)
	if not is_overlap:
	return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2

	return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2