|
from typing import List, Tuple |
|
|
|
import numpy as np |
|
import torch |
|
from PIL import Image, ImageDraw |
|
from torchvision.transforms.functional import (to_tensor) |
|
import cv2 |
|
from .model import WPODNet |
|
|
|
|
|
class Prediction: |
|
def __init__(self, image: Image.Image, bounds: np.ndarray, confidence: float): |
|
self.image = image |
|
self.bounds = bounds |
|
self.confidence = confidence |
|
|
|
def _get_width_height(self): |
|
def distance(point1,point2): |
|
x1=point1[0] |
|
y1=point1[1] |
|
x2=point2[0] |
|
y2=point2[1] |
|
distance = np.sqrt((x2 - x1)**2 + (y2 - y1)**2) |
|
return distance |
|
box = self.bounds |
|
dis1= distance(box[0],box[1]) |
|
dis2 = distance(box[1],box[2]) |
|
dis3 = distance(box[2],box[3]) |
|
dis4 = distance(box[3],box[0]) |
|
width = (dis1+dis3)/2 |
|
height= (dis2+dis4)/2 |
|
if height/width >0.49: |
|
return 64,46 |
|
return 100, 23 |
|
def get_perspective_M(self, width: int, height: int) -> List[float]: |
|
|
|
src_points = np.array(self.bounds,dtype=np.float32) |
|
dst_points = np.array([[0, 0], [width, 0], [width, height], [0, height]],np.float32) |
|
return cv2.getPerspectiveTransform(src_points,dst_points) |
|
def annotate(self, outline: str = 'red', width: int = 3) -> Image.Image: |
|
canvas = self.image.copy() |
|
drawer = ImageDraw.Draw(canvas) |
|
drawer.polygon( |
|
[(x, y) for x, y in self.bounds], |
|
outline=outline, |
|
width=width |
|
) |
|
return canvas |
|
|
|
def warp(self): |
|
|
|
width, height = self._get_width_height() |
|
|
|
M= self.get_perspective_M(width, height) |
|
|
|
n_image = np.array(self.image) |
|
warped = cv2.warpPerspective(n_image,M,(int(width),int(height))) |
|
return warped |
|
|
|
|
|
class Predictor: |
|
_q = np.array([ |
|
[-.5, .5, .5, -.5], |
|
[-.5, -.5, .5, .5], |
|
[1., 1., 1., 1.] |
|
]) |
|
_scaling_const = 7.75 |
|
_stride = 16 |
|
|
|
def __init__(self, wpodnet:WPODNet): |
|
self.wpodnet = wpodnet |
|
self.wpodnet.eval() |
|
|
|
def _resize_to_fixed_ratio(self, image: Image.Image, dim_min: int, dim_max: int) -> Image.Image: |
|
h, w = image.height, image.width |
|
|
|
wh_ratio = max(h, w) / min(h, w) |
|
side = int(wh_ratio * dim_min) |
|
bound_dim = min(side + side % self._stride, dim_max) |
|
|
|
factor = bound_dim / max(h, w) |
|
reg_w, reg_h = int(w * factor), int(h * factor) |
|
|
|
|
|
reg_w_mod = reg_w % self._stride |
|
if reg_w_mod > 0: |
|
reg_w += self._stride - reg_w_mod |
|
|
|
reg_h_mod = reg_h % self._stride |
|
if reg_h_mod > 0: |
|
reg_h += self._stride - reg_h % self._stride |
|
|
|
return image.resize((reg_w, reg_h)) |
|
|
|
def _to_torch_image(self, image: Image.Image) -> torch.Tensor: |
|
tensor = to_tensor(image) |
|
return tensor.unsqueeze_(0) |
|
|
|
def _inference(self, image: torch.Tensor) -> Tuple[np.ndarray, np.ndarray]: |
|
with torch.no_grad(): |
|
probs, affines = self.wpodnet.forward(image) |
|
|
|
|
|
|
|
|
|
probs = np.squeeze(probs.cpu().numpy())[0] |
|
affines = np.squeeze(affines.cpu().numpy()) |
|
|
|
return probs, affines |
|
|
|
def _get_max_anchor(self, probs: np.ndarray) -> Tuple[int, int]: |
|
return np.unravel_index(probs.argmax(), probs.shape) |
|
|
|
def _get_bounds(self, affines: np.ndarray, anchor_y: int, anchor_x: int, scaling_ratio: float = 1.0) -> np.ndarray: |
|
|
|
theta = affines[:, anchor_y, anchor_x] |
|
theta = theta.reshape((2, 3)) |
|
theta[0, 0] = max(theta[0, 0], 0.0) |
|
theta[1, 1] = max(theta[1, 1], 0.0) |
|
|
|
|
|
bounds = np.matmul(theta, self._q) * self._scaling_const * scaling_ratio |
|
|
|
|
|
_, grid_h, grid_w = affines.shape |
|
bounds[0] = (bounds[0] + anchor_x + .5) / grid_w |
|
bounds[1] = (bounds[1] + anchor_y + .5) / grid_h |
|
|
|
return np.transpose(bounds) |
|
|
|
def predict(self, image: Image.Image, scaling_ratio: float = 1.0, dim_min: int = 288, dim_max: int = 608) -> Prediction: |
|
orig_h, orig_w = image.height, image.width |
|
|
|
|
|
|
|
resized = self._resize_to_fixed_ratio(image, dim_min=dim_min, dim_max=dim_max) |
|
resized = self._to_torch_image(resized) |
|
resized = resized.to(self.wpodnet.device) |
|
|
|
|
|
|
|
|
|
probs, affines = self._inference(resized) |
|
|
|
|
|
max_prob = np.amax(probs) |
|
anchor_y, anchor_x = self._get_max_anchor(probs) |
|
bounds = self._get_bounds(affines, anchor_y, anchor_x, scaling_ratio) |
|
|
|
bounds[:, 0] *= orig_w |
|
bounds[:, 1] *= orig_h |
|
|
|
return Prediction( |
|
image=image, |
|
bounds=bounds.astype(np.int32), |
|
confidence=max_prob.item() |
|
) |
|
|