Spaces:
Runtime error
Runtime error
import math | |
import cv2 | |
import numpy as np | |
__all__ = ["EASTProcessTrain"] | |
class EASTProcessTrain(object): | |
def __init__( | |
self, | |
image_shape=[512, 512], | |
background_ratio=0.125, | |
min_crop_side_ratio=0.1, | |
min_text_size=10, | |
**kwargs | |
): | |
self.input_size = image_shape[1] | |
self.random_scale = np.array([0.5, 1, 2.0, 3.0]) | |
self.background_ratio = background_ratio | |
self.min_crop_side_ratio = min_crop_side_ratio | |
self.min_text_size = min_text_size | |
def preprocess(self, im): | |
input_size = self.input_size | |
im_shape = im.shape | |
im_size_min = np.min(im_shape[0:2]) | |
im_size_max = np.max(im_shape[0:2]) | |
im_scale = float(input_size) / float(im_size_max) | |
im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale) | |
img_mean = [0.485, 0.456, 0.406] | |
img_std = [0.229, 0.224, 0.225] | |
# im = im[:, :, ::-1].astype(np.float32) | |
im = im / 255 | |
im -= img_mean | |
im /= img_std | |
new_h, new_w, _ = im.shape | |
im_padded = np.zeros((input_size, input_size, 3), dtype=np.float32) | |
im_padded[:new_h, :new_w, :] = im | |
im_padded = im_padded.transpose((2, 0, 1)) | |
im_padded = im_padded[np.newaxis, :] | |
return im_padded, im_scale | |
def rotate_im_poly(self, im, text_polys): | |
""" | |
rotate image with 90 / 180 / 270 degre | |
""" | |
im_w, im_h = im.shape[1], im.shape[0] | |
dst_im = im.copy() | |
dst_polys = [] | |
rand_degree_ratio = np.random.rand() | |
rand_degree_cnt = 1 | |
if 0.333 < rand_degree_ratio < 0.666: | |
rand_degree_cnt = 2 | |
elif rand_degree_ratio > 0.666: | |
rand_degree_cnt = 3 | |
for i in range(rand_degree_cnt): | |
dst_im = np.rot90(dst_im) | |
rot_degree = -90 * rand_degree_cnt | |
rot_angle = rot_degree * math.pi / 180.0 | |
n_poly = text_polys.shape[0] | |
cx, cy = 0.5 * im_w, 0.5 * im_h | |
ncx, ncy = 0.5 * dst_im.shape[1], 0.5 * dst_im.shape[0] | |
for i in range(n_poly): | |
wordBB = text_polys[i] | |
poly = [] | |
for j in range(4): | |
sx, sy = wordBB[j][0], wordBB[j][1] | |
dx = ( | |
math.cos(rot_angle) * (sx - cx) | |
- math.sin(rot_angle) * (sy - cy) | |
+ ncx | |
) | |
dy = ( | |
math.sin(rot_angle) * (sx - cx) | |
+ math.cos(rot_angle) * (sy - cy) | |
+ ncy | |
) | |
poly.append([dx, dy]) | |
dst_polys.append(poly) | |
dst_polys = np.array(dst_polys, dtype=np.float32) | |
return dst_im, dst_polys | |
def polygon_area(self, poly): | |
""" | |
compute area of a polygon | |
:param poly: | |
:return: | |
""" | |
edge = [ | |
(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]), | |
(poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]), | |
(poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]), | |
(poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1]), | |
] | |
return np.sum(edge) / 2.0 | |
def check_and_validate_polys(self, polys, tags, img_height, img_width): | |
""" | |
check so that the text poly is in the same direction, | |
and also filter some invalid polygons | |
:param polys: | |
:param tags: | |
:return: | |
""" | |
h, w = img_height, img_width | |
if polys.shape[0] == 0: | |
return polys | |
polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1) | |
polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1) | |
validated_polys = [] | |
validated_tags = [] | |
for poly, tag in zip(polys, tags): | |
p_area = self.polygon_area(poly) | |
# invalid poly | |
if abs(p_area) < 1: | |
continue | |
if p_area > 0: | |
#'poly in wrong direction' | |
if not tag: | |
tag = True # reversed cases should be ignore | |
poly = poly[(0, 3, 2, 1), :] | |
validated_polys.append(poly) | |
validated_tags.append(tag) | |
return np.array(validated_polys), np.array(validated_tags) | |
def draw_img_polys(self, img, polys): | |
if len(img.shape) == 4: | |
img = np.squeeze(img, axis=0) | |
if img.shape[0] == 3: | |
img = img.transpose((1, 2, 0)) | |
img[:, :, 2] += 123.68 | |
img[:, :, 1] += 116.78 | |
img[:, :, 0] += 103.94 | |
cv2.imwrite("tmp.jpg", img) | |
img = cv2.imread("tmp.jpg") | |
for box in polys: | |
box = box.astype(np.int32).reshape((-1, 1, 2)) | |
cv2.polylines(img, [box], True, color=(255, 255, 0), thickness=2) | |
import random | |
ino = random.randint(0, 100) | |
cv2.imwrite("tmp_%d.jpg" % ino, img) | |
return | |
def shrink_poly(self, poly, r): | |
""" | |
fit a poly inside the origin poly, maybe bugs here... | |
used for generate the score map | |
:param poly: the text poly | |
:param r: r in the paper | |
:return: the shrinked poly | |
""" | |
# shrink ratio | |
R = 0.3 | |
# find the longer pair | |
dist0 = np.linalg.norm(poly[0] - poly[1]) | |
dist1 = np.linalg.norm(poly[2] - poly[3]) | |
dist2 = np.linalg.norm(poly[0] - poly[3]) | |
dist3 = np.linalg.norm(poly[1] - poly[2]) | |
if dist0 + dist1 > dist2 + dist3: | |
# first move (p0, p1), (p2, p3), then (p0, p3), (p1, p2) | |
## p0, p1 | |
theta = np.arctan2((poly[1][1] - poly[0][1]), (poly[1][0] - poly[0][0])) | |
poly[0][0] += R * r[0] * np.cos(theta) | |
poly[0][1] += R * r[0] * np.sin(theta) | |
poly[1][0] -= R * r[1] * np.cos(theta) | |
poly[1][1] -= R * r[1] * np.sin(theta) | |
## p2, p3 | |
theta = np.arctan2((poly[2][1] - poly[3][1]), (poly[2][0] - poly[3][0])) | |
poly[3][0] += R * r[3] * np.cos(theta) | |
poly[3][1] += R * r[3] * np.sin(theta) | |
poly[2][0] -= R * r[2] * np.cos(theta) | |
poly[2][1] -= R * r[2] * np.sin(theta) | |
## p0, p3 | |
theta = np.arctan2((poly[3][0] - poly[0][0]), (poly[3][1] - poly[0][1])) | |
poly[0][0] += R * r[0] * np.sin(theta) | |
poly[0][1] += R * r[0] * np.cos(theta) | |
poly[3][0] -= R * r[3] * np.sin(theta) | |
poly[3][1] -= R * r[3] * np.cos(theta) | |
## p1, p2 | |
theta = np.arctan2((poly[2][0] - poly[1][0]), (poly[2][1] - poly[1][1])) | |
poly[1][0] += R * r[1] * np.sin(theta) | |
poly[1][1] += R * r[1] * np.cos(theta) | |
poly[2][0] -= R * r[2] * np.sin(theta) | |
poly[2][1] -= R * r[2] * np.cos(theta) | |
else: | |
## p0, p3 | |
# print poly | |
theta = np.arctan2((poly[3][0] - poly[0][0]), (poly[3][1] - poly[0][1])) | |
poly[0][0] += R * r[0] * np.sin(theta) | |
poly[0][1] += R * r[0] * np.cos(theta) | |
poly[3][0] -= R * r[3] * np.sin(theta) | |
poly[3][1] -= R * r[3] * np.cos(theta) | |
## p1, p2 | |
theta = np.arctan2((poly[2][0] - poly[1][0]), (poly[2][1] - poly[1][1])) | |
poly[1][0] += R * r[1] * np.sin(theta) | |
poly[1][1] += R * r[1] * np.cos(theta) | |
poly[2][0] -= R * r[2] * np.sin(theta) | |
poly[2][1] -= R * r[2] * np.cos(theta) | |
## p0, p1 | |
theta = np.arctan2((poly[1][1] - poly[0][1]), (poly[1][0] - poly[0][0])) | |
poly[0][0] += R * r[0] * np.cos(theta) | |
poly[0][1] += R * r[0] * np.sin(theta) | |
poly[1][0] -= R * r[1] * np.cos(theta) | |
poly[1][1] -= R * r[1] * np.sin(theta) | |
## p2, p3 | |
theta = np.arctan2((poly[2][1] - poly[3][1]), (poly[2][0] - poly[3][0])) | |
poly[3][0] += R * r[3] * np.cos(theta) | |
poly[3][1] += R * r[3] * np.sin(theta) | |
poly[2][0] -= R * r[2] * np.cos(theta) | |
poly[2][1] -= R * r[2] * np.sin(theta) | |
return poly | |
def generate_quad(self, im_size, polys, tags): | |
""" | |
Generate quadrangle. | |
""" | |
h, w = im_size | |
poly_mask = np.zeros((h, w), dtype=np.uint8) | |
score_map = np.zeros((h, w), dtype=np.uint8) | |
# (x1, y1, ..., x4, y4, short_edge_norm) | |
geo_map = np.zeros((h, w, 9), dtype=np.float32) | |
# mask used during traning, to ignore some hard areas | |
training_mask = np.ones((h, w), dtype=np.uint8) | |
for poly_idx, poly_tag in enumerate(zip(polys, tags)): | |
poly = poly_tag[0] | |
tag = poly_tag[1] | |
r = [None, None, None, None] | |
for i in range(4): | |
dist1 = np.linalg.norm(poly[i] - poly[(i + 1) % 4]) | |
dist2 = np.linalg.norm(poly[i] - poly[(i - 1) % 4]) | |
r[i] = min(dist1, dist2) | |
# score map | |
shrinked_poly = self.shrink_poly(poly.copy(), r).astype(np.int32)[ | |
np.newaxis, :, : | |
] | |
cv2.fillPoly(score_map, shrinked_poly, 1) | |
cv2.fillPoly(poly_mask, shrinked_poly, poly_idx + 1) | |
# if the poly is too small, then ignore it during training | |
poly_h = min( | |
np.linalg.norm(poly[0] - poly[3]), np.linalg.norm(poly[1] - poly[2]) | |
) | |
poly_w = min( | |
np.linalg.norm(poly[0] - poly[1]), np.linalg.norm(poly[2] - poly[3]) | |
) | |
if min(poly_h, poly_w) < self.min_text_size: | |
cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0) | |
if tag: | |
cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0) | |
xy_in_poly = np.argwhere(poly_mask == (poly_idx + 1)) | |
# geo map. | |
y_in_poly = xy_in_poly[:, 0] | |
x_in_poly = xy_in_poly[:, 1] | |
poly[:, 0] = np.minimum(np.maximum(poly[:, 0], 0), w) | |
poly[:, 1] = np.minimum(np.maximum(poly[:, 1], 0), h) | |
for pno in range(4): | |
geo_channel_beg = pno * 2 | |
geo_map[y_in_poly, x_in_poly, geo_channel_beg] = ( | |
x_in_poly - poly[pno, 0] | |
) | |
geo_map[y_in_poly, x_in_poly, geo_channel_beg + 1] = ( | |
y_in_poly - poly[pno, 1] | |
) | |
geo_map[y_in_poly, x_in_poly, 8] = 1.0 / max(min(poly_h, poly_w), 1.0) | |
return score_map, geo_map, training_mask | |
def crop_area(self, im, polys, tags, crop_background=False, max_tries=50): | |
""" | |
make random crop from the input image | |
:param im: | |
:param polys: | |
:param tags: | |
:param crop_background: | |
:param max_tries: | |
:return: | |
""" | |
h, w, _ = im.shape | |
pad_h = h // 10 | |
pad_w = w // 10 | |
h_array = np.zeros((h + pad_h * 2), dtype=np.int32) | |
w_array = np.zeros((w + pad_w * 2), dtype=np.int32) | |
for poly in polys: | |
poly = np.round(poly, decimals=0).astype(np.int32) | |
minx = np.min(poly[:, 0]) | |
maxx = np.max(poly[:, 0]) | |
w_array[minx + pad_w : maxx + pad_w] = 1 | |
miny = np.min(poly[:, 1]) | |
maxy = np.max(poly[:, 1]) | |
h_array[miny + pad_h : maxy + pad_h] = 1 | |
# ensure the cropped area not across a text | |
h_axis = np.where(h_array == 0)[0] | |
w_axis = np.where(w_array == 0)[0] | |
if len(h_axis) == 0 or len(w_axis) == 0: | |
return im, polys, tags | |
for i in range(max_tries): | |
xx = np.random.choice(w_axis, size=2) | |
xmin = np.min(xx) - pad_w | |
xmax = np.max(xx) - pad_w | |
xmin = np.clip(xmin, 0, w - 1) | |
xmax = np.clip(xmax, 0, w - 1) | |
yy = np.random.choice(h_axis, size=2) | |
ymin = np.min(yy) - pad_h | |
ymax = np.max(yy) - pad_h | |
ymin = np.clip(ymin, 0, h - 1) | |
ymax = np.clip(ymax, 0, h - 1) | |
if ( | |
xmax - xmin < self.min_crop_side_ratio * w | |
or ymax - ymin < self.min_crop_side_ratio * h | |
): | |
# area too small | |
continue | |
if polys.shape[0] != 0: | |
poly_axis_in_area = ( | |
(polys[:, :, 0] >= xmin) | |
& (polys[:, :, 0] <= xmax) | |
& (polys[:, :, 1] >= ymin) | |
& (polys[:, :, 1] <= ymax) | |
) | |
selected_polys = np.where(np.sum(poly_axis_in_area, axis=1) == 4)[0] | |
else: | |
selected_polys = [] | |
if len(selected_polys) == 0: | |
# no text in this area | |
if crop_background: | |
im = im[ymin : ymax + 1, xmin : xmax + 1, :] | |
polys = [] | |
tags = [] | |
return im, polys, tags | |
else: | |
continue | |
im = im[ymin : ymax + 1, xmin : xmax + 1, :] | |
polys = polys[selected_polys] | |
tags = tags[selected_polys] | |
polys[:, :, 0] -= xmin | |
polys[:, :, 1] -= ymin | |
return im, polys, tags | |
return im, polys, tags | |
def crop_background_infor(self, im, text_polys, text_tags): | |
im, text_polys, text_tags = self.crop_area( | |
im, text_polys, text_tags, crop_background=True | |
) | |
if len(text_polys) > 0: | |
return None | |
# pad and resize image | |
input_size = self.input_size | |
im, ratio = self.preprocess(im) | |
score_map = np.zeros((input_size, input_size), dtype=np.float32) | |
geo_map = np.zeros((input_size, input_size, 9), dtype=np.float32) | |
training_mask = np.ones((input_size, input_size), dtype=np.float32) | |
return im, score_map, geo_map, training_mask | |
def crop_foreground_infor(self, im, text_polys, text_tags): | |
im, text_polys, text_tags = self.crop_area( | |
im, text_polys, text_tags, crop_background=False | |
) | |
if text_polys.shape[0] == 0: | |
return None | |
# continue for all ignore case | |
if np.sum((text_tags * 1.0)) >= text_tags.size: | |
return None | |
# pad and resize image | |
input_size = self.input_size | |
im, ratio = self.preprocess(im) | |
text_polys[:, :, 0] *= ratio | |
text_polys[:, :, 1] *= ratio | |
_, _, new_h, new_w = im.shape | |
# print(im.shape) | |
# self.draw_img_polys(im, text_polys) | |
score_map, geo_map, training_mask = self.generate_quad( | |
(new_h, new_w), text_polys, text_tags | |
) | |
return im, score_map, geo_map, training_mask | |
def __call__(self, data): | |
im = data["image"] | |
text_polys = data["polys"] | |
text_tags = data["ignore_tags"] | |
if im is None: | |
return None | |
if text_polys.shape[0] == 0: | |
return None | |
# add rotate cases | |
if np.random.rand() < 0.5: | |
im, text_polys = self.rotate_im_poly(im, text_polys) | |
h, w, _ = im.shape | |
text_polys, text_tags = self.check_and_validate_polys( | |
text_polys, text_tags, h, w | |
) | |
if text_polys.shape[0] == 0: | |
return None | |
# random scale this image | |
rd_scale = np.random.choice(self.random_scale) | |
im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale) | |
text_polys *= rd_scale | |
if np.random.rand() < self.background_ratio: | |
outs = self.crop_background_infor(im, text_polys, text_tags) | |
else: | |
outs = self.crop_foreground_infor(im, text_polys, text_tags) | |
if outs is None: | |
return None | |
im, score_map, geo_map, training_mask = outs | |
score_map = score_map[np.newaxis, ::4, ::4].astype(np.float32) | |
geo_map = np.swapaxes(geo_map, 1, 2) | |
geo_map = np.swapaxes(geo_map, 1, 0) | |
geo_map = geo_map[:, ::4, ::4].astype(np.float32) | |
training_mask = training_mask[np.newaxis, ::4, ::4] | |
training_mask = training_mask.astype(np.float32) | |
data["image"] = im[0] | |
data["score_map"] = score_map | |
data["geo_map"] = geo_map | |
data["training_mask"] = training_mask | |
return data | |