Spaces:
Runtime error
Runtime error
from __future__ import absolute_import, division, print_function, unicode_literals | |
import random | |
import cv2 | |
import numpy as np | |
def is_poly_in_rect(poly, x, y, w, h): | |
poly = np.array(poly) | |
if poly[:, 0].min() < x or poly[:, 0].max() > x + w: | |
return False | |
if poly[:, 1].min() < y or poly[:, 1].max() > y + h: | |
return False | |
return True | |
def is_poly_outside_rect(poly, x, y, w, h): | |
poly = np.array(poly) | |
if poly[:, 0].max() < x or poly[:, 0].min() > x + w: | |
return True | |
if poly[:, 1].max() < y or poly[:, 1].min() > y + h: | |
return True | |
return False | |
def split_regions(axis): | |
regions = [] | |
min_axis = 0 | |
for i in range(1, axis.shape[0]): | |
if axis[i] != axis[i - 1] + 1: | |
region = axis[min_axis:i] | |
min_axis = i | |
regions.append(region) | |
return regions | |
def random_select(axis, max_size): | |
xx = np.random.choice(axis, size=2) | |
xmin = np.min(xx) | |
xmax = np.max(xx) | |
xmin = np.clip(xmin, 0, max_size - 1) | |
xmax = np.clip(xmax, 0, max_size - 1) | |
return xmin, xmax | |
def region_wise_random_select(regions, max_size): | |
selected_index = list(np.random.choice(len(regions), 2)) | |
selected_values = [] | |
for index in selected_index: | |
axis = regions[index] | |
xx = int(np.random.choice(axis, size=1)) | |
selected_values.append(xx) | |
xmin = min(selected_values) | |
xmax = max(selected_values) | |
return xmin, xmax | |
def crop_area(im, text_polys, min_crop_side_ratio, max_tries): | |
h, w, _ = im.shape | |
h_array = np.zeros(h, dtype=np.int32) | |
w_array = np.zeros(w, dtype=np.int32) | |
for points in text_polys: | |
points = np.round(points, decimals=0).astype(np.int32) | |
minx = np.min(points[:, 0]) | |
maxx = np.max(points[:, 0]) | |
w_array[minx:maxx] = 1 | |
miny = np.min(points[:, 1]) | |
maxy = np.max(points[:, 1]) | |
h_array[miny:maxy] = 1 | |
# ensure the cropped area not across a text | |
h_axis = np.where(h_array == 0)[0] | |
w_axis = np.where(w_array == 0)[0] | |
if len(h_axis) == 0 or len(w_axis) == 0: | |
return 0, 0, w, h | |
h_regions = split_regions(h_axis) | |
w_regions = split_regions(w_axis) | |
for i in range(max_tries): | |
if len(w_regions) > 1: | |
xmin, xmax = region_wise_random_select(w_regions, w) | |
else: | |
xmin, xmax = random_select(w_axis, w) | |
if len(h_regions) > 1: | |
ymin, ymax = region_wise_random_select(h_regions, h) | |
else: | |
ymin, ymax = random_select(h_axis, h) | |
if ( | |
xmax - xmin < min_crop_side_ratio * w | |
or ymax - ymin < min_crop_side_ratio * h | |
): | |
# area too small | |
continue | |
num_poly_in_rect = 0 | |
for poly in text_polys: | |
if not is_poly_outside_rect(poly, xmin, ymin, xmax - xmin, ymax - ymin): | |
num_poly_in_rect += 1 | |
break | |
if num_poly_in_rect > 0: | |
return xmin, ymin, xmax - xmin, ymax - ymin | |
return 0, 0, w, h | |
class EastRandomCropData(object): | |
def __init__( | |
self, | |
size=(640, 640), | |
max_tries=10, | |
min_crop_side_ratio=0.1, | |
keep_ratio=True, | |
**kwargs | |
): | |
self.size = size | |
self.max_tries = max_tries | |
self.min_crop_side_ratio = min_crop_side_ratio | |
self.keep_ratio = keep_ratio | |
def __call__(self, data): | |
img = data["image"] | |
text_polys = data["polys"] | |
ignore_tags = data["ignore_tags"] | |
texts = data["texts"] | |
all_care_polys = [text_polys[i] for i, tag in enumerate(ignore_tags) if not tag] | |
# 计算crop区域 | |
crop_x, crop_y, crop_w, crop_h = crop_area( | |
img, all_care_polys, self.min_crop_side_ratio, self.max_tries | |
) | |
# crop 图片 保持比例填充 | |
scale_w = self.size[0] / crop_w | |
scale_h = self.size[1] / crop_h | |
scale = min(scale_w, scale_h) | |
h = int(crop_h * scale) | |
w = int(crop_w * scale) | |
if self.keep_ratio: | |
padimg = np.zeros((self.size[1], self.size[0], img.shape[2]), img.dtype) | |
padimg[:h, :w] = cv2.resize( | |
img[crop_y : crop_y + crop_h, crop_x : crop_x + crop_w], (w, h) | |
) | |
img = padimg | |
else: | |
img = cv2.resize( | |
img[crop_y : crop_y + crop_h, crop_x : crop_x + crop_w], | |
tuple(self.size), | |
) | |
# crop 文本框 | |
text_polys_crop = [] | |
ignore_tags_crop = [] | |
texts_crop = [] | |
for poly, text, tag in zip(text_polys, texts, ignore_tags): | |
poly = ((poly - (crop_x, crop_y)) * scale).tolist() | |
if not is_poly_outside_rect(poly, 0, 0, w, h): | |
text_polys_crop.append(poly) | |
ignore_tags_crop.append(tag) | |
texts_crop.append(text) | |
data["image"] = img | |
data["polys"] = np.array(text_polys_crop) | |
data["ignore_tags"] = ignore_tags_crop | |
data["texts"] = texts_crop | |
return data | |
class RandomCropImgMask(object): | |
def __init__(self, size, main_key, crop_keys, p=3 / 8, **kwargs): | |
self.size = size | |
self.main_key = main_key | |
self.crop_keys = crop_keys | |
self.p = p | |
def __call__(self, data): | |
image = data["image"] | |
h, w = image.shape[0:2] | |
th, tw = self.size | |
if w == tw and h == th: | |
return data | |
mask = data[self.main_key] | |
if np.max(mask) > 0 and random.random() > self.p: | |
# make sure to crop the text region | |
tl = np.min(np.where(mask > 0), axis=1) - (th, tw) | |
tl[tl < 0] = 0 | |
br = np.max(np.where(mask > 0), axis=1) - (th, tw) | |
br[br < 0] = 0 | |
br[0] = min(br[0], h - th) | |
br[1] = min(br[1], w - tw) | |
i = random.randint(tl[0], br[0]) if tl[0] < br[0] else 0 | |
j = random.randint(tl[1], br[1]) if tl[1] < br[1] else 0 | |
else: | |
i = random.randint(0, h - th) if h - th > 0 else 0 | |
j = random.randint(0, w - tw) if w - tw > 0 else 0 | |
# return i, j, th, tw | |
for k in data: | |
if k in self.crop_keys: | |
if len(data[k].shape) == 3: | |
if np.argmin(data[k].shape) == 0: | |
img = data[k][:, i : i + th, j : j + tw] | |
if img.shape[1] != img.shape[2]: | |
a = 1 | |
elif np.argmin(data[k].shape) == 2: | |
img = data[k][i : i + th, j : j + tw, :] | |
if img.shape[1] != img.shape[0]: | |
a = 1 | |
else: | |
img = data[k] | |
else: | |
img = data[k][i : i + th, j : j + tw] | |
if img.shape[0] != img.shape[1]: | |
a = 1 | |
data[k] = img | |
return data | |