# -*- coding: utf-8 -*- """ Include functions for normalizing images of words and letters Main functions: word_normalization, letter_normalization, image_standardization """ import numpy as np import cv2 import math from .helpers import * def image_standardization(image): """Image standardization should result in same output as tf.image.per_image_standardization. """ return (image - np.mean(image)) / max(np.std(image), 1.0/math.sqrt(image.size)) def _crop_add_border(img, height, threshold=50, border=True, border_size=15): """Crop and add border to word image of letter segmentation.""" # Clear small values ret, img = cv2.threshold(img, threshold, 255, cv2.THRESH_TOZERO) x0 = 0 y0 = 0 x1 = img.shape[1] y1 = img.shape[0] for i in range(img.shape[0]): if np.count_nonzero(img[i, :]) > 1: y0 = i break for i in reversed(range(img.shape[0])): if np.count_nonzero(img[i, :]) > 1: y1 = i+1 break for i in range(img.shape[1]): if np.count_nonzero(img[:, i]) > 1: x0 = i break for i in reversed(range(img.shape[1])): if np.count_nonzero(img[:, i]) > 1: x1 = i+1 break if height != 0: img = resize(img[y0:y1, x0:x1], height, True) else: img = img[y0:y1, x0:x1] if border: return cv2.copyMakeBorder(img, 0, 0, border_size, border_size, cv2.BORDER_CONSTANT, value=[0, 0, 0]) return img def _word_tilt(img, height, border=True, border_size=15): """Detect the angle and tilt the image.""" edges = cv2.Canny(img, 50, 150, apertureSize = 3) lines = cv2.HoughLines(edges, 1, np.pi/180, 30) if lines is not None: meanAngle = 0 # Set min number of valid lines (try higher) numLines = np.sum(1 for l in lines if l[0][1] < 0.7 or l[0][1] > 2.6) if numLines > 1: meanAngle = np.mean([l[0][1] for l in lines if l[0][1] < 0.7 or l[0][1] > 2.6]) # Look for angle with correct value if meanAngle != 0 and (meanAngle < 0.7 or meanAngle > 2.6): img = _tilt_by_angle(img, meanAngle, height) return _crop_add_border(img, height, 50, border, border_size) def _tilt_by_angle(img, angle, height): """Tilt the image by given angle.""" dist = np.tan(angle) * height width = len(img[0]) sPoints = np.float32([[0,0], [0,height], [width,height], [width,0]]) # Dist is positive for angle < 0.7; negative for angle > 2.6 # Image must be shifed to right if dist > 0: tPoints = np.float32([[0,0], [dist,height], [width+dist,height], [width,0]]) else: tPoints = np.float32([[-dist,0], [0,height], [width,height], [width-dist,0]]) M = cv2.getPerspectiveTransform(sPoints, tPoints) return cv2.warpPerspective(img, M, (int(width+abs(dist)), height)) def _sobel_detect(channel): """The Sobel Operator.""" sobelX = cv2.Sobel(channel, cv2.CV_16S, 1, 0) sobelY = cv2.Sobel(channel, cv2.CV_16S, 0, 1) # Combine x, y gradient magnitudes sqrt(x^2 + y^2) sobel = np.hypot(sobelX, sobelY) sobel[sobel > 255] = 255 return np.uint8(sobel) class HysterThresh: def __init__(self, img): img = 255 - img img = (img - np.min(img)) / (np.max(img) - np.min(img)) * 255 hist, bins = np.histogram(img.ravel(), 256, [0,256]) self.high = np.argmax(hist) + 65 self.low = np.argmax(hist) + 45 self.diff = 255 - self.high self.img = img self.im = np.zeros(img.shape, dtype=img.dtype) def get_image(self): self._hyster() return np.uint8(self.im) def _hyster_rec(self, r, c): h, w = self.img.shape for ri in range(r-1, r+2): for ci in range(c-1, c+2): if (h > ri >= 0 and w > ci >= 0 and self.im[ri, ci] == 0 and self.high > self.img[ri, ci] >= self.low): self.im[ri, ci] = self.img[ri, ci] + self.diff self._hyster_rec(ri, ci) def _hyster(self): r, c = self.img.shape for ri in range(r): for ci in range(c): if (self.img[ri, ci] >= self.high): self.im[ri, ci] = 255 self.img[ri, ci] = 255 self._hyster_rec(ri, ci) def _hyst_word_norm(image): """Word normalization using hystheresis thresholding.""" gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # img = cv2.bilateralFilter(gray, 0, 10, 30) img = cv2.bilateralFilter(gray, 10, 10, 30) return HysterThresh(img).get_image() def word_normalization(image, height, border=True, tilt=True, border_size=15, hyst_norm=False): """ Preprocess a word - resize, binarize, tilt world.""" image = resize(image, height, True) if hyst_norm: th = _hyst_word_norm(image) else: img = cv2.bilateralFilter(image, 10, 30, 30) gray = 255 - cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) norm = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX) ret,th = cv2.threshold(norm, 50, 255, cv2.THRESH_TOZERO) if tilt: return _word_tilt(th, height, border, border_size) return _crop_add_border(th, height=height, border=border, border_size=border_size) def _resize_letter(img, size = 56): """Resize bigger side of the image to given size.""" if (img.shape[0] > img.shape[1]): rat = size / img.shape[0] return cv2.resize(img, (int(rat * img.shape[1]), size)) else: rat = size / img.shape[1] return cv2.resize(img, (size, int(rat * img.shape[0]))) return img def letter_normalization(image, is_thresh=True, dim=False): """Preprocess a letter - crop, resize""" if is_thresh and image.shape[0] > 0 and image.shape[1] > 0: image = _crop_add_border(image, height=0, threshold=80, border=False) resized = image if image.shape[0] > 1 and image.shape[1] > 1: resized = _resize_letter(image) result = np.zeros((64, 64), np.uint8) offset = [0, 0] # Calculate offset for smaller size if image.shape[0] > image.shape[1]: offset = [int((result.shape[1] - resized.shape[1])/2), 4] else: offset = [4, int((result.shape[0] - resized.shape[0])/2)] # Replace zeros by image result[offset[1]:offset[1] + resized.shape[0], offset[0]:offset[0] + resized.shape[1]] = resized if dim: return result, image.shape return result