Spaces:

windy2612
/

License_Plate_Recognition

Running

File size: 5,522 Bytes

ab576ba

from typing import List, Tuple

import numpy as np
import torch
from PIL import Image, ImageDraw
from torchvision.transforms.functional import (to_tensor)
import cv2
from .model import WPODNet


class Prediction:
    def __init__(self, image: Image.Image, bounds: np.ndarray, confidence: float):
        self.image = image
        self.bounds = bounds
        self.confidence = confidence
    
    def _get_width_height(self):
        def distance(point1,point2):
            x1=point1[0]
            y1=point1[1]
            x2=point2[0]
            y2=point2[1]
            distance = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
            return distance
        box = self.bounds
        dis1= distance(box[0],box[1])
        dis2 = distance(box[1],box[2])
        dis3 = distance(box[2],box[3])
        dis4 = distance(box[3],box[0])
        width = (dis1+dis3)/2
        height= (dis2+dis4)/2
        if height/width >0.49:
            return 64,46
        return 100, 23
    def get_perspective_M(self, width: int, height: int) -> List[float]:
        # Get the perspective matrix
        src_points = np.array(self.bounds,dtype=np.float32)
        dst_points = np.array([[0, 0], [width, 0], [width, height], [0, height]],np.float32)
        return cv2.getPerspectiveTransform(src_points,dst_points)
    def annotate(self, outline: str = 'red', width: int = 3) -> Image.Image:
        canvas = self.image.copy()
        drawer = ImageDraw.Draw(canvas)
        drawer.polygon(
            [(x, y) for x, y in self.bounds],
            outline=outline,
            width=width
        )
        return canvas

    def warp(self):#, width: int = 208, height: int = 60) -> Image.Image:
        # Get the perspective matrix
        width, height = self._get_width_height()
        
        M= self.get_perspective_M(width, height)
         
        n_image = np.array(self.image)
        warped = cv2.warpPerspective(n_image,M,(int(width),int(height)))
        return warped


class Predictor:
    _q = np.array([
        [-.5, .5, .5, -.5],
        [-.5, -.5, .5, .5],
        [1., 1., 1., 1.]
    ])
    _scaling_const = 7.75
    _stride = 16

    def __init__(self, wpodnet:WPODNet):
        self.wpodnet = wpodnet
        self.wpodnet.eval()

    def _resize_to_fixed_ratio(self, image: Image.Image, dim_min: int, dim_max: int) -> Image.Image:
        h, w = image.height, image.width

        wh_ratio = max(h, w) / min(h, w)
        side = int(wh_ratio * dim_min)
        bound_dim = min(side + side % self._stride, dim_max)

        factor = bound_dim / max(h, w)
        reg_w, reg_h = int(w * factor), int(h * factor)

        # Ensure the both width and height are the multiply of `self._stride`
        reg_w_mod = reg_w % self._stride
        if reg_w_mod > 0:
            reg_w += self._stride - reg_w_mod

        reg_h_mod = reg_h % self._stride
        if reg_h_mod > 0:
            reg_h += self._stride - reg_h % self._stride

        return image.resize((reg_w, reg_h))

    def _to_torch_image(self, image: Image.Image) -> torch.Tensor:
        tensor = to_tensor(image)
        return tensor.unsqueeze_(0)

    def _inference(self, image: torch.Tensor) -> Tuple[np.ndarray, np.ndarray]:
        with torch.no_grad():
            probs, affines = self.wpodnet.forward(image)

        # Convert to squeezed numpy array
        # grid_w: The number of anchors in row
        # grid_h: The number of anchors in column
        probs = np.squeeze(probs.cpu().numpy())[0]     # (grid_h, grid_w)
        affines = np.squeeze(affines.cpu().numpy())  # (6, grid_h, grid_w)

        return probs, affines

    def _get_max_anchor(self, probs: np.ndarray) -> Tuple[int, int]:
        return np.unravel_index(probs.argmax(), probs.shape)

    def _get_bounds(self, affines: np.ndarray, anchor_y: int, anchor_x: int, scaling_ratio: float = 1.0) -> np.ndarray:
        # Compute theta
        theta = affines[:, anchor_y, anchor_x]
        theta = theta.reshape((2, 3))
        theta[0, 0] = max(theta[0, 0], 0.0)
        theta[1, 1] = max(theta[1, 1], 0.0)

        # Convert theta into the bounding polygon
        bounds = np.matmul(theta, self._q) * self._scaling_const * scaling_ratio

        # Normalize the bounds
        _, grid_h, grid_w = affines.shape
        bounds[0] = (bounds[0] + anchor_x + .5) / grid_w
        bounds[1] = (bounds[1] + anchor_y + .5) / grid_h

        return np.transpose(bounds)

    def predict(self, image: Image.Image, scaling_ratio: float = 1.0, dim_min: int = 288, dim_max: int = 608) -> Prediction:
        orig_h, orig_w = image.height, image.width

        # Resize the image to fixed ratio
        # This operation is convienence for setup the anchors
        resized = self._resize_to_fixed_ratio(image, dim_min=dim_min, dim_max=dim_max)
        resized = self._to_torch_image(resized)
        resized = resized.to(self.wpodnet.device)

        # Inference with WPODNet
        # probs: The probability distribution of the location of license plate
        # affines: The predicted affine matrix
        probs, affines = self._inference(resized)

        # Get the theta with maximum probability
        max_prob = np.amax(probs)
        anchor_y, anchor_x = self._get_max_anchor(probs)
        bounds = self._get_bounds(affines, anchor_y, anchor_x, scaling_ratio)

        bounds[:, 0] *= orig_w
        bounds[:, 1] *= orig_h

        return Prediction(
            image=image,
            bounds=bounds.astype(np.int32),
            confidence=max_prob.item()
        )