File size: 5,522 Bytes
ab576ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
from typing import List, Tuple
import numpy as np
import torch
from PIL import Image, ImageDraw
from torchvision.transforms.functional import (to_tensor)
import cv2
from .model import WPODNet
class Prediction:
def __init__(self, image: Image.Image, bounds: np.ndarray, confidence: float):
self.image = image
self.bounds = bounds
self.confidence = confidence
def _get_width_height(self):
def distance(point1,point2):
x1=point1[0]
y1=point1[1]
x2=point2[0]
y2=point2[1]
distance = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
return distance
box = self.bounds
dis1= distance(box[0],box[1])
dis2 = distance(box[1],box[2])
dis3 = distance(box[2],box[3])
dis4 = distance(box[3],box[0])
width = (dis1+dis3)/2
height= (dis2+dis4)/2
if height/width >0.49:
return 64,46
return 100, 23
def get_perspective_M(self, width: int, height: int) -> List[float]:
# Get the perspective matrix
src_points = np.array(self.bounds,dtype=np.float32)
dst_points = np.array([[0, 0], [width, 0], [width, height], [0, height]],np.float32)
return cv2.getPerspectiveTransform(src_points,dst_points)
def annotate(self, outline: str = 'red', width: int = 3) -> Image.Image:
canvas = self.image.copy()
drawer = ImageDraw.Draw(canvas)
drawer.polygon(
[(x, y) for x, y in self.bounds],
outline=outline,
width=width
)
return canvas
def warp(self):#, width: int = 208, height: int = 60) -> Image.Image:
# Get the perspective matrix
width, height = self._get_width_height()
M= self.get_perspective_M(width, height)
n_image = np.array(self.image)
warped = cv2.warpPerspective(n_image,M,(int(width),int(height)))
return warped
class Predictor:
_q = np.array([
[-.5, .5, .5, -.5],
[-.5, -.5, .5, .5],
[1., 1., 1., 1.]
])
_scaling_const = 7.75
_stride = 16
def __init__(self, wpodnet:WPODNet):
self.wpodnet = wpodnet
self.wpodnet.eval()
def _resize_to_fixed_ratio(self, image: Image.Image, dim_min: int, dim_max: int) -> Image.Image:
h, w = image.height, image.width
wh_ratio = max(h, w) / min(h, w)
side = int(wh_ratio * dim_min)
bound_dim = min(side + side % self._stride, dim_max)
factor = bound_dim / max(h, w)
reg_w, reg_h = int(w * factor), int(h * factor)
# Ensure the both width and height are the multiply of `self._stride`
reg_w_mod = reg_w % self._stride
if reg_w_mod > 0:
reg_w += self._stride - reg_w_mod
reg_h_mod = reg_h % self._stride
if reg_h_mod > 0:
reg_h += self._stride - reg_h % self._stride
return image.resize((reg_w, reg_h))
def _to_torch_image(self, image: Image.Image) -> torch.Tensor:
tensor = to_tensor(image)
return tensor.unsqueeze_(0)
def _inference(self, image: torch.Tensor) -> Tuple[np.ndarray, np.ndarray]:
with torch.no_grad():
probs, affines = self.wpodnet.forward(image)
# Convert to squeezed numpy array
# grid_w: The number of anchors in row
# grid_h: The number of anchors in column
probs = np.squeeze(probs.cpu().numpy())[0] # (grid_h, grid_w)
affines = np.squeeze(affines.cpu().numpy()) # (6, grid_h, grid_w)
return probs, affines
def _get_max_anchor(self, probs: np.ndarray) -> Tuple[int, int]:
return np.unravel_index(probs.argmax(), probs.shape)
def _get_bounds(self, affines: np.ndarray, anchor_y: int, anchor_x: int, scaling_ratio: float = 1.0) -> np.ndarray:
# Compute theta
theta = affines[:, anchor_y, anchor_x]
theta = theta.reshape((2, 3))
theta[0, 0] = max(theta[0, 0], 0.0)
theta[1, 1] = max(theta[1, 1], 0.0)
# Convert theta into the bounding polygon
bounds = np.matmul(theta, self._q) * self._scaling_const * scaling_ratio
# Normalize the bounds
_, grid_h, grid_w = affines.shape
bounds[0] = (bounds[0] + anchor_x + .5) / grid_w
bounds[1] = (bounds[1] + anchor_y + .5) / grid_h
return np.transpose(bounds)
def predict(self, image: Image.Image, scaling_ratio: float = 1.0, dim_min: int = 288, dim_max: int = 608) -> Prediction:
orig_h, orig_w = image.height, image.width
# Resize the image to fixed ratio
# This operation is convienence for setup the anchors
resized = self._resize_to_fixed_ratio(image, dim_min=dim_min, dim_max=dim_max)
resized = self._to_torch_image(resized)
resized = resized.to(self.wpodnet.device)
# Inference with WPODNet
# probs: The probability distribution of the location of license plate
# affines: The predicted affine matrix
probs, affines = self._inference(resized)
# Get the theta with maximum probability
max_prob = np.amax(probs)
anchor_y, anchor_x = self._get_max_anchor(probs)
bounds = self._get_bounds(affines, anchor_y, anchor_x, scaling_ratio)
bounds[:, 0] *= orig_w
bounds[:, 1] *= orig_h
return Prediction(
image=image,
bounds=bounds.astype(np.int32),
confidence=max_prob.item()
)
|