NguyenPhong2612
first commit
ab576ba
from typing import List, Tuple
import numpy as np
import torch
from PIL import Image, ImageDraw
from torchvision.transforms.functional import (to_tensor)
import cv2
from .model import WPODNet
class Prediction:
def __init__(self, image: Image.Image, bounds: np.ndarray, confidence: float):
self.image = image
self.bounds = bounds
self.confidence = confidence
def _get_width_height(self):
def distance(point1,point2):
x1=point1[0]
y1=point1[1]
x2=point2[0]
y2=point2[1]
distance = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
return distance
box = self.bounds
dis1= distance(box[0],box[1])
dis2 = distance(box[1],box[2])
dis3 = distance(box[2],box[3])
dis4 = distance(box[3],box[0])
width = (dis1+dis3)/2
height= (dis2+dis4)/2
if height/width >0.49:
return 64,46
return 100, 23
def get_perspective_M(self, width: int, height: int) -> List[float]:
# Get the perspective matrix
src_points = np.array(self.bounds,dtype=np.float32)
dst_points = np.array([[0, 0], [width, 0], [width, height], [0, height]],np.float32)
return cv2.getPerspectiveTransform(src_points,dst_points)
def annotate(self, outline: str = 'red', width: int = 3) -> Image.Image:
canvas = self.image.copy()
drawer = ImageDraw.Draw(canvas)
drawer.polygon(
[(x, y) for x, y in self.bounds],
outline=outline,
width=width
)
return canvas
def warp(self):#, width: int = 208, height: int = 60) -> Image.Image:
# Get the perspective matrix
width, height = self._get_width_height()
M= self.get_perspective_M(width, height)
n_image = np.array(self.image)
warped = cv2.warpPerspective(n_image,M,(int(width),int(height)))
return warped
class Predictor:
_q = np.array([
[-.5, .5, .5, -.5],
[-.5, -.5, .5, .5],
[1., 1., 1., 1.]
])
_scaling_const = 7.75
_stride = 16
def __init__(self, wpodnet:WPODNet):
self.wpodnet = wpodnet
self.wpodnet.eval()
def _resize_to_fixed_ratio(self, image: Image.Image, dim_min: int, dim_max: int) -> Image.Image:
h, w = image.height, image.width
wh_ratio = max(h, w) / min(h, w)
side = int(wh_ratio * dim_min)
bound_dim = min(side + side % self._stride, dim_max)
factor = bound_dim / max(h, w)
reg_w, reg_h = int(w * factor), int(h * factor)
# Ensure the both width and height are the multiply of `self._stride`
reg_w_mod = reg_w % self._stride
if reg_w_mod > 0:
reg_w += self._stride - reg_w_mod
reg_h_mod = reg_h % self._stride
if reg_h_mod > 0:
reg_h += self._stride - reg_h % self._stride
return image.resize((reg_w, reg_h))
def _to_torch_image(self, image: Image.Image) -> torch.Tensor:
tensor = to_tensor(image)
return tensor.unsqueeze_(0)
def _inference(self, image: torch.Tensor) -> Tuple[np.ndarray, np.ndarray]:
with torch.no_grad():
probs, affines = self.wpodnet.forward(image)
# Convert to squeezed numpy array
# grid_w: The number of anchors in row
# grid_h: The number of anchors in column
probs = np.squeeze(probs.cpu().numpy())[0] # (grid_h, grid_w)
affines = np.squeeze(affines.cpu().numpy()) # (6, grid_h, grid_w)
return probs, affines
def _get_max_anchor(self, probs: np.ndarray) -> Tuple[int, int]:
return np.unravel_index(probs.argmax(), probs.shape)
def _get_bounds(self, affines: np.ndarray, anchor_y: int, anchor_x: int, scaling_ratio: float = 1.0) -> np.ndarray:
# Compute theta
theta = affines[:, anchor_y, anchor_x]
theta = theta.reshape((2, 3))
theta[0, 0] = max(theta[0, 0], 0.0)
theta[1, 1] = max(theta[1, 1], 0.0)
# Convert theta into the bounding polygon
bounds = np.matmul(theta, self._q) * self._scaling_const * scaling_ratio
# Normalize the bounds
_, grid_h, grid_w = affines.shape
bounds[0] = (bounds[0] + anchor_x + .5) / grid_w
bounds[1] = (bounds[1] + anchor_y + .5) / grid_h
return np.transpose(bounds)
def predict(self, image: Image.Image, scaling_ratio: float = 1.0, dim_min: int = 288, dim_max: int = 608) -> Prediction:
orig_h, orig_w = image.height, image.width
# Resize the image to fixed ratio
# This operation is convienence for setup the anchors
resized = self._resize_to_fixed_ratio(image, dim_min=dim_min, dim_max=dim_max)
resized = self._to_torch_image(resized)
resized = resized.to(self.wpodnet.device)
# Inference with WPODNet
# probs: The probability distribution of the location of license plate
# affines: The predicted affine matrix
probs, affines = self._inference(resized)
# Get the theta with maximum probability
max_prob = np.amax(probs)
anchor_y, anchor_x = self._get_max_anchor(probs)
bounds = self._get_bounds(affines, anchor_y, anchor_x, scaling_ratio)
bounds[:, 0] *= orig_w
bounds[:, 1] *= orig_h
return Prediction(
image=image,
bounds=bounds.astype(np.int32),
confidence=max_prob.item()
)