Spaces:

windy2612
/

License_Plate_Recognition

Sleeping

App Files Files Community

NguyenPhong2612 commited on Oct 1, 2024

Commit

ab576ba

1 Parent(s): 4185961

first commit

Browse files

Files changed (14) hide show

.gitignore +8 -0
Test.ipynb +80 -0
app.py +45 -0
parseq/augmentation.py +127 -0
parseq/config.yaml +25 -0
parseq/module.py +140 -0
parseq/system.py +311 -0
parseq/utils.py +113 -0
requirements.txt +8 -0
wpodnet/__init__.py +7 -0
wpodnet/backend.py +157 -0
wpodnet/lib_detection.py +265 -0
wpodnet/model.py +73 -0
wpodnet/stream.py +36 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+/Test.ipynb/
+/Test image/
+/parseq/__pycache__/
+/wpodnet/__pycache__/
+/wpodnet/__init__/
+/__init__/
+/flagged/
+/weights/

Test.ipynb ADDED Viewed

	@@ -0,0 +1,80 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch \n",
+    "import torch.nn as nn\n",
+    "from parseq.system import System\n",
+    "import yaml\n",
+    "import cv2\n",
+    "from parseq.augmentation import trans\n",
+    "import PIL\n",
+    "import imgaug\n",
+    "import torchvision\n",
+    "from wpodnet.lib_detection import load_model_wpod, detect_lp\n",
+    "import numpy as np\n",
+    "import gradio as gr \n",
+    "import tensorflow as tf\n",
+    "from tensorflow import keras\n",
+    "import timm\n",
+    "import pytorch_lightning as pl"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensorflow==2.13.1\n",
+      "torch==2.4.1+cu118\n",
+      "gradio==4.44.1\n",
+      "timm==1.0.9\n",
+      "PIL==10.2.0\n",
+      "imgaug==0.4.0\n",
+      "opencv-python==4.10.0\n",
+      "torchvision==0.19.1+cu118\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f'tensorflow=={tf.__version__}')\n",
+    "print(f'torch=={torch.__version__}')\n",
+    "print(f'gradio=={gr.__version__}')\n",
+    "print(f'timm=={timm.__version__}')\n",
+    "print(f'PIL=={PIL.__version__}')\n",
+    "print(f'imgaug=={imgaug.__version__}')\n",
+    "print(f'opencv-python=={cv2.__version__}')\n",
+    "print(f'torchvision=={torchvision.__version__}')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "virtual",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

app.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import torch
+import torch.nn as nn
+from parseq.system import System
+import yaml
+import cv2
+from parseq.augmentation import trans
+from PIL import Image
+from wpodnet.lib_detection import load_model_wpod, detect_lp
+import numpy as np
+import gradio as gr
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+checkpoint_path = 'weights/parseq.ckpt'
+config_path = 'parseq/config.yaml'
+wpod_path = 'weights/wpod-net.h5'
+wpod_net = load_model_wpod(wpod_path)
+with open(config_path, 'r') as data:
+    config = yaml.safe_load(data)
+system = System(config)
+checkpoint_path = 'weights/parseq.ckpt'
+checkpoint = torch.load(checkpoint_path, map_location = 'cuda')
+system.load_state_dict(checkpoint['state_dict'])
+system.to(device)
+def predict(image):
+    if isinstance(image, str):
+        image = cv2.imread(image)
+    _, img_wapred, _, _ = detect_lp(wpod_net, image, 0.5)
+    img =  (img_wapred[0] * 255).astype(np.uint8)
+    img = Image.fromarray(img).convert("RGB")
+    image = trans(img).unsqueeze(0)
+    with torch.no_grad():
+        pred = system(image).softmax(-1)
+    generated_text, _ = system.tokenizer.decode(pred)
+    return generated_text[0]
+interface = gr.Interface(
+    fn = predict,
+    inputs =[gr.components.Image()],
+    outputs=[gr.components.Textbox(label = "License plate", lines = 2)])
+interface.launch(share = True, debug = True)

parseq/augmentation.py ADDED Viewed

	@@ -0,0 +1,127 @@

+from PIL import Image, ImageFilter
+from timm.data.auto_augment import _LEVEL_DENOM, LEVEL_TO_ARG, NAME_TO_OP, _randomly_negate, rotate
+from functools import partial
+from timm.data import auto_augment
+import imgaug.augmenters as iaa
+from torchvision import transforms as T
+import numpy as np
+image_size = [224, 224]
+def rotate_expand(img, degrees, **kwargs):
+    kwargs['expand'] = True
+    return rotate(img, degrees, **kwargs)
+def _level_to_arg(level, hparams, key, default):
+    magnitude = hparams.get(key, default)
+    level = (level / _LEVEL_DENOM) * magnitude
+    level = _randomly_negate(level)
+    return (level,)
+def apply():
+    NAME_TO_OP.update({
+        'Rotate': rotate_expand,
+    })
+    LEVEL_TO_ARG.update({
+        'Rotate': partial(_level_to_arg, key='rotate_deg', default=30.0),
+        'ShearX': partial(_level_to_arg, key='shear_x_pct', default=0.3),
+        'ShearY': partial(_level_to_arg, key='shear_y_pct', default=0.3),
+        'TranslateXRel': partial(_level_to_arg, key='translate_x_pct', default=0.45),
+        'TranslateYRel': partial(_level_to_arg, key='translate_y_pct', default=0.45),
+    })
+apply()
+_OP_CACHE = {}
+def _get_op(key, factory):
+    try:
+        op = _OP_CACHE[key]
+    except KeyError:
+        op = factory()
+        _OP_CACHE[key] = op
+    return op
+def _get_param(level, img, max_dim_factor, min_level=1):
+    max_level = max(min_level, max_dim_factor * max(img.size))
+    return round(min(level, max_level))
+def gaussian_blur(img, radius, **__):
+    radius = _get_param(radius, img, 0.02)
+    key = 'gaussian_blur_' + str(radius)
+    op = _get_op(key, lambda: ImageFilter.GaussianBlur(radius))
+    return img.filter(op)
+def motion_blur(img, k, **__):
+    k = _get_param(k, img, 0.08, 3) | 1  # bin to odd values
+    key = 'motion_blur_' + str(k)
+    op = _get_op(key, lambda: iaa.MotionBlur(k))
+    return Image.fromarray(op(image=np.asarray(img)))
+def gaussian_noise(img, scale, **_):
+    scale = _get_param(scale, img, 0.25) | 1  # bin to odd values
+    key = 'gaussian_noise_' + str(scale)
+    op = _get_op(key, lambda: iaa.AdditiveGaussianNoise(scale=scale))
+    return Image.fromarray(op(image=np.asarray(img)))
+def poisson_noise(img, lam, **_):
+    lam = _get_param(lam, img, 0.2) | 1  # bin to odd values
+    key = 'poisson_noise_' + str(lam)
+    op = _get_op(key, lambda: iaa.AdditivePoissonNoise(lam))
+    return Image.fromarray(op(image=np.asarray(img)))
+def _level_to_arg(level, _hparams, max):
+    level = max * level / auto_augment._LEVEL_DENOM
+    return (level,)
+_RAND_TRANSFORMS = auto_augment._RAND_INCREASING_TRANSFORMS.copy()
+_RAND_TRANSFORMS.remove('SharpnessIncreasing')  # remove, interferes with *blur ops
+_RAND_TRANSFORMS.extend([
+    'GaussianBlur',
+    'PoissonNoise',
+])
+auto_augment.LEVEL_TO_ARG.update({
+    'GaussianBlur': partial(_level_to_arg, max=4),
+    'MotionBlur': partial(_level_to_arg, max=20),
+    'GaussianNoise': partial(_level_to_arg, max=0.1 * 255),
+    'PoissonNoise': partial(_level_to_arg, max=40),
+})
+auto_augment.NAME_TO_OP.update({
+    'GaussianBlur': gaussian_blur,
+    'MotionBlur': motion_blur,
+    'GaussianNoise': gaussian_noise,
+    'PoissonNoise': poisson_noise,
+})
+def rand_augment_transform(magnitude=5, num_layers=3):
+    hparams = {
+        'rotate_deg': 30,
+        'shear_x_pct': 0.9,
+        'shear_y_pct': 0.2,
+        'translate_x_pct': 0.10,
+        'translate_y_pct': 0.30,
+    }
+    ra_ops = auto_augment.rand_augment_ops(magnitude, hparams=hparams, transforms=_RAND_TRANSFORMS)
+    choice_weights = [1.0 / len(ra_ops) for _ in range(len(ra_ops))]
+    return auto_augment.RandAugment(ra_ops, num_layers, choice_weights)
+trans = [rand_augment_transform()]
+trans.append(lambda img: img.rotate(0, expand = True))
+trans.extend([
+            T.Resize(image_size, T.InterpolationMode.BICUBIC),
+            T.ToTensor(),
+            T.Normalize(0.5, 0.5),
+        ])
+trans = T.Compose(trans)

parseq/config.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+model:
+  image_size : [224, 224]
+  patch_size : [16, 16]
+  max_len : 25
+  d_model : 384
+  enc_num_heads : 6
+  enc_mlp_ratio : 4
+  enc_depth : 12
+  dec_num_heads : 12
+  dec_mlp_ratio : 4
+  dec_depth : 1
+  perm_num : 8
+  perm_forward : true
+  perm_mirrored : true
+  decode_ar : true
+  refine_iter : 2
+  num_tokens : 97
+  pretrained : false
+  train_charset : 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~
+  weight_url : https://github.com/baudm/parseq/releases/download/v1.0.0/parseq_small_patch16_224-fcf06f5a.pt
+trainer:
+  lr : 3e-4
+  batch_size : 4
+  weight_decay : 0.0
+  warm_pct : 0.075

parseq/module.py ADDED Viewed

	@@ -0,0 +1,140 @@

+from timm.models.vision_transformer import PatchEmbed, VisionTransformer
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+class Encoder(VisionTransformer):
+    def __init__(
+        self,
+        image_size=224,
+        patch_size=16,
+        in_chans=3,
+        embed_dim=768,
+        depth=12,
+        num_heads=12,
+        mlp_ratio=4.0,
+        qkv_bias=True,
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        drop_path_rate=0.0,
+        embed_layer=PatchEmbed):
+        super().__init__(
+            image_size,
+            patch_size,
+            in_chans,
+            embed_dim=embed_dim,
+            depth=depth,
+            num_heads=num_heads,
+            mlp_ratio=mlp_ratio,
+            qkv_bias=qkv_bias,
+            drop_rate=drop_rate,
+            attn_drop_rate=attn_drop_rate,
+            drop_path_rate=drop_path_rate,
+            embed_layer=embed_layer,
+            num_classes=0,
+            global_pool='',
+            class_token=False)
+    def forward(self, x):
+        return self.forward_features(x.to(device))
+class DecoderLayer(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.d_model = config['model']['d_model']
+        self.dec_num_heads = config['model']['dec_num_heads']
+        self.d_ff = config['model']['dec_mlp_ratio'] * self.d_model
+        self.eps = 1e-5
+        self.self_attn = nn.MultiheadAttention(self.d_model, self.dec_num_heads, dropout = 0.1, batch_first=True)
+        self.cross_attn = nn.MultiheadAttention(self.d_model, self.dec_num_heads, dropout = 0.1, batch_first=True)
+        self.linear1 = nn.Linear(self.d_model, self.d_ff)
+        self.dropout = nn.Dropout(p = 0.1)
+        self.linear2 = nn.Linear(self.d_ff, self.d_model)
+        self.norm1 = nn.LayerNorm(self.d_model, eps=self.eps)
+        self.norm2 = nn.LayerNorm(self.d_model, eps=self.eps)
+        self.norm_q = nn.LayerNorm(self.d_model, eps=self.eps)
+        self.norm_c = nn.LayerNorm(self.d_model, eps=self.eps)
+        self.dropout1 = nn.Dropout(p = 0.1)
+        self.dropout2 = nn.Dropout(p = 0.1)
+        self.dropout3 = nn.Dropout(p = 0.1)
+    def forward_stream(
+        self,
+        tgt,
+        tgt_norm,
+        tgt_kv,
+        memory,
+        tgt_mask,
+        tgt_key_padding_mask):
+        tgt2, sa_weights = self.self_attn(
+            tgt_norm, tgt_kv, tgt_kv, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask
+        )
+        tgt = tgt + self.dropout1(tgt2)
+        tgt2, ca_weights = self.cross_attn(self.norm1(tgt), memory, memory)
+        tgt = tgt + self.dropout2(tgt2)
+        tgt2 = self.linear2(self.dropout(F.gelu(self.linear1(self.norm2(tgt)))))
+        tgt = tgt + self.dropout3(tgt2)
+        return tgt, sa_weights, ca_weights
+    def forward(
+        self,
+        query,
+        content,
+        memory,
+        query_mask = None,
+        content_mask = None,
+        content_key_padding_mask = None,
+        update_content: bool = True):
+        query_norm = self.norm_q(query)
+        content_norm = self.norm_c(content)
+        query = self.forward_stream(query, query_norm, content_norm, memory, query_mask, content_key_padding_mask)[0]
+        if update_content:
+            content = self.forward_stream(
+                content, content_norm, content_norm, memory, content_mask, content_key_padding_mask
+            )[0]
+        return query, content
+class Decoder(nn.Module):
+    __constants__ = ['norm']
+    def __init__(self, config):
+        super().__init__()
+        self.d_model = config['model']['d_model']
+        self.num_layers = config['model']['dec_depth']
+        self.layers = nn.ModuleList([DecoderLayer(config) for _ in range(self.num_layers)])
+        self.norm = nn.LayerNorm(self.d_model)
+    def forward(self, query, content, memory, query_mask = None, content_mask = None, content_key_padding_mask = None):
+        for i, mod in enumerate(self.layers):
+            last = i == len(self.layers) - 1
+            query, content = mod(
+                query, content, memory, query_mask, content_mask, content_key_padding_mask, update_content = not last)
+        query = self.norm(query)
+        return query
+class TokenEmbedding(nn.Module):
+    def __init__(self,  config):
+        super().__init__()
+        self.num_tokens = config['model']['num_tokens']
+        self.d_model = config['model']['d_model']
+        self.embedding = nn.Embedding(self.num_tokens, self.d_model)
+    def forward(self, tokens):
+        return math.sqrt(self.d_model) * self.embedding(tokens)

parseq/system.py ADDED Viewed

	@@ -0,0 +1,311 @@

+import torch
+import torch.nn as nn
+from timm.models.helpers import named_apply
+from functools import partial
+from .module import Encoder, Decoder, TokenEmbedding
+from .utils import init_weights
+import pytorch_lightning as pl
+from .utils import Tokenizer, CharsetAdapter
+import numpy as np
+import math
+from torch.optim import Optimizer
+from timm.optim import create_optimizer_v2
+from torch.optim.lr_scheduler import OneCycleLR
+from itertools import permutations
+import torch.nn.functional as F
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+class PARSeq(nn.Module):
+    def __init__(self, config, device = device):
+        super().__init__()
+        self.max_len = config['model']['max_len']
+        self.decode_ar = config['model']['decode_ar']
+        self.refine_iters = config['model']['refine_iter']
+        self.embed_dim = config['model']['d_model']
+        self.num_tokens = config['model']['num_tokens']
+        self.dropout = 0.1
+        self.encoder = Encoder(config['model']['image_size'], config['model']['patch_size'], embed_dim = config['model']['d_model'], depth = config['model']['enc_depth'], num_heads = config['model']['enc_num_heads'], mlp_ratio = config['model']['enc_mlp_ratio'])
+        self.decoder = Decoder(config)
+        self.text_embed = TokenEmbedding(config)
+        self.head = nn.Linear(self.embed_dim, self.num_tokens - 2)
+        self.pos_queries = nn.Parameter(torch.Tensor(1, self.max_len + 1, self.embed_dim))
+        self.dropout = nn.Dropout(self.dropout)
+        named_apply(partial(init_weights, exclude=['encoder']), self)
+        nn.init.trunc_normal_(self.pos_queries, std = 0.02)
+        self._device = device
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        param_names = {'text_embed.embedding.weight', 'pos_queries'}
+        enc_param_names = {'encoder.' + n for n in self.encoder.no_weight_decay()}
+        return param_names.union(enc_param_names)
+    def encode(self, img: torch.Tensor):
+        return self.encoder(img.to(self._device))
+    def decode(
+        self,
+        tgt: torch.Tensor,
+        memory: torch.Tensor,
+        tgt_mask = None,
+        tgt_padding_mask = None,
+        tgt_query = None,
+        tgt_query_mask = None):
+        N, L = tgt.shape
+        null_ctx = self.text_embed(tgt[:, :1])
+        tgt_emb = self.pos_queries[:, : L - 1] + self.text_embed(tgt[:, 1:])
+        tgt_emb = self.dropout(torch.cat([null_ctx, tgt_emb], dim=1))
+        if tgt_query is None:
+            tgt_query = self.pos_queries[:, :L].expand(N, -1, -1)
+        tgt_query = self.dropout(tgt_query)
+        return self.decoder(tgt_query, tgt_emb, memory, tgt_query_mask, tgt_mask, tgt_padding_mask)
+    def forward(self, tokenizer: Tokenizer, images, max_length):
+        testing = max_length is None
+        max_length = self.max_len if max_length is None else min(max_length, self.max_len)
+        bs = images.shape[0]
+        num_steps = max_length + 1
+        memory = self.encode(images).to(device)
+        pos_queries = self.pos_queries[:, :num_steps].expand(bs, -1, -1)
+        tgt_mask = query_mask = torch.triu(torch.ones((num_steps, num_steps), dtype=torch.bool, device=self._device), 1)
+        if self.decode_ar:
+            tgt_in = torch.full((bs, num_steps), tokenizer.pad_id, dtype=torch.long, device=self._device)
+            tgt_in[:, 0] = tokenizer.sos_id
+            logits = []
+            for i in range(num_steps):
+                j = i + 1
+                tgt_out = self.decode(
+                    tgt_in[:, :j],
+                    memory,
+                    tgt_mask[:j, :j],
+                    tgt_query = pos_queries[:, i:j],
+                    tgt_query_mask = query_mask[i:j, :j],)
+                p_i = self.head(tgt_out)
+                logits.append(p_i)
+                if j < num_steps:
+                    tgt_in[:, j] = p_i.squeeze().argmax(-1)
+                    if testing and (tgt_in == tokenizer.eos_id).any(dim=-1).all():
+                        break
+            logits = torch.cat(logits, dim=1)
+        else:
+            tgt_in = torch.full((bs, 1), tokenizer.sos_id, dtype=torch.long, device=self._device)
+            tgt_out = self.decode(tgt_in, memory, tgt_query=pos_queries)
+            logits = self.head(tgt_out)
+        if self.refine_iters:
+            query_mask[torch.triu(torch.ones(num_steps, num_steps, dtype=torch.bool, device=self._device), 2)] = 0
+            bos = torch.full((bs, 1), tokenizer.sos_id, dtype=torch.long, device=self._device)
+            for i in range(self.refine_iters):
+                tgt_in = torch.cat([bos, logits[:, :-1].argmax(-1)], dim=1)
+                tgt_padding_mask = (tgt_in == tokenizer.eos_id).int().cumsum(-1) > 0
+                tgt_out = self.decode(
+                    tgt_in, memory, tgt_mask, tgt_padding_mask, pos_queries, query_mask[:, : tgt_in.shape[1]])
+                logits = self.head(tgt_out)
+        return logits
+class System(pl.LightningModule):
+    def __init__( self, config):
+        super().__init__()
+        self.save_hyperparameters()
+        self.max_len = int(config['model']['max_len'])
+        self.charset_adapter = CharsetAdapter()
+        self.charset = config['model']['train_charset']
+        self.lr = float(config['trainer']['lr'])
+        self.batch_size = config['trainer']['batch_size']
+        self.warm_pct = float(config['trainer']['warm_pct'])
+        self.weight_decay = float(config['trainer']['weight_decay'])
+        self.tokenizer = Tokenizer(self.charset, self.max_len)
+        self.sos_id = self.tokenizer.sos_id
+        self.eos_id = self.tokenizer.eos_id
+        self.pad_id = self.tokenizer.pad_id
+        self.model = PARSeq(config)
+        self.rng = np.random.default_rng()
+        self.max_gen_perms = config['model']['perm_num'] // 2 if config['model']['perm_mirrored'] else config['model']['perm_num']
+        self.perm_forward = config['model']['perm_forward']
+        self.perm_mirrored = config['model']['perm_mirrored']
+        if config['model']['pretrained']:
+            self.weight_ulr = config['model']['weight_url']
+            self.load_weight(self.weight_ulr)
+        self.set_seed()
+    def set_seed(self, seed = 42):
+        torch.manual_seed(seed)
+        np.random.seed(seed)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed(seed)
+            torch.cuda.manual_seed_all(seed)
+            torch.backends.cudnn.deterministic = True
+            torch.backends.cudnn.benchmark = False
+    def configure_optimizers(self):
+        agb = self.trainer.accumulate_grad_batches
+        lr_scale = agb * math.sqrt(self.trainer.num_devices) * self.batch_size / 256.0
+        lr = float(lr_scale) * float(self.lr)
+        optim = create_optimizer_v2(self, 'adamw', lr, self.weight_decay)
+        sched = OneCycleLR(
+            optim, lr, self.trainer.estimated_stepping_batches, pct_start=self.warm_pct, cycle_momentum=False
+        )
+        return {'optimizer': optim, 'lr_scheduler': {'scheduler': sched, 'interval': 'step'}}
+    def optimizer_zero_grad(self, epoch: int, batch_idx: int, optimizer: Optimizer) -> None:
+        optimizer.zero_grad(set_to_none=True)
+    def forward(self, images, max_length = None):
+        return self.model.forward(self.tokenizer, images, max_length)
+    def gen_tgt_perms(self, tgt):
+        max_num_chars = tgt.shape[1] - 2
+        if max_num_chars == 1:
+            return torch.arange(3, device=self._device).unsqueeze(0)
+        perms = [torch.arange(max_num_chars, device=self._device)] if self.perm_forward else []
+        max_perms = math.factorial(max_num_chars)
+        if self.perm_mirrored:
+            max_perms //= 2
+        num_gen_perms = min(self.max_gen_perms, max_perms)
+        if max_num_chars < 5:
+            if max_num_chars == 4 and self.perm_mirrored:
+                selector = [0, 3, 4, 6, 9, 10, 12, 16, 17, 18, 19, 21]
+            else:
+                selector = list(range(max_perms))
+            perm_pool = torch.as_tensor(
+                list(permutations(range(max_num_chars), max_num_chars)),
+                device=self._device,
+            )[selector]
+            if self.perm_forward:
+                perm_pool = perm_pool[1:]
+            perms = torch.stack(perms)
+            if len(perm_pool):
+                i = self.rng.choice(len(perm_pool), size=num_gen_perms - len(perms), replace=False)
+                perms = torch.cat([perms, perm_pool[i]])
+        else:
+            perms.extend(
+                [torch.randperm(max_num_chars, device = self._device) for _ in range(num_gen_perms - len(perms))]
+            )
+            perms = torch.stack(perms)
+        if self.perm_mirrored:
+            comp = perms.flip(-1)
+            perms = torch.stack([perms, comp]).transpose(0, 1).reshape(-1, max_num_chars)
+        sos_idx = perms.new_zeros((len(perms), 1))
+        eos_idx = perms.new_full((len(perms), 1), max_num_chars + 1)
+        perms = torch.cat([sos_idx, perms + 1, eos_idx], dim=1)
+        if len(perms) > 1:
+            perms[1, 1:] = max_num_chars + 1 - torch.arange(max_num_chars + 1, device=self._device)
+        return perms
+    def generate_attn_masks(self, perm):
+        sz = perm.shape[0]
+        mask = torch.zeros((sz, sz), dtype=torch.bool, device=self._device)
+        for i in range(sz):
+            query_idx = perm[i]
+            masked_keys = perm[i + 1 :]
+            mask[query_idx, masked_keys] = True
+        content_mask = mask[:-1, :-1].clone()
+        mask[torch.eye(sz, dtype=torch.bool, device=self._device)] = True  # mask "self"
+        query_mask = mask[1:, :-1]
+        return content_mask, query_mask
+    def training_step(self, batch, batch_idx):
+        images, labels = batch
+        images = images.to(device)
+        tgt = labels.to(device)
+        memory = self.model.encode(images.to(device))
+        tgt_perms = self.gen_tgt_perms(tgt)
+        tgt_in = tgt[:, :-1]
+        tgt_out = tgt[:, 1:]
+        tgt_padding_mask = (tgt_in == self.pad_id) | (tgt_in == self.eos_id)
+        loss = 0
+        loss_numel = 0
+        n = (tgt_out != self.pad_id).sum().item()
+        for i, perm in enumerate(tgt_perms):
+            tgt_mask, query_mask = self.generate_attn_masks(perm)
+            out = self.model.decode(tgt_in, memory, tgt_mask, tgt_padding_mask, tgt_query_mask=query_mask)
+            logits = self.model.head(out).flatten(end_dim=1)
+            loss += n * F.cross_entropy(logits, tgt_out.flatten(), ignore_index=self.pad_id)
+            loss_numel += n
+            if i == 1:
+                tgt_out = torch.where(tgt_out == self.eos_id, self.pad_id, tgt_out)
+                n = (tgt_out != self.pad_id).sum().item()
+        loss /= loss_numel
+        with torch.no_grad():
+            self.eval()
+            logits, _, _ = self.forward_logits_loss(images, labels)
+        predicted_labels, _ = self.tokenizer.decode(logits.softmax(-1))
+        predicted_labels = [self.charset_adapter(label) for label in predicted_labels]
+        true_labels = self.decode(labels)
+        count = 0
+        for i in range(len(true_labels)):
+            if true_labels[i] == predicted_labels[i]:
+                count += 1
+        train_acc = float(count / len(true_labels))
+        self.log("train_loss", loss, on_epoch = True, prog_bar = True, logger = True)
+        self.log("train_acc", train_acc, on_epoch = True, prog_bar = True, logger = True)
+        return loss
+    def forward_logits_loss(self, images, targets: list[str]):
+        targets = targets[:, 1:]
+        max_len = targets.shape[1] - 1
+        logits = self.forward(images, max_len)
+        loss = F.cross_entropy(logits.flatten(end_dim = 1), targets.flatten(), ignore_index = self.pad_id)
+        loss_numel = (targets != self.pad_id).sum()
+        return logits, loss, loss_numel
+    def validation_step(self, batch, batch_idx):
+        self.eval()
+        images, labels = batch
+        with torch.no_grad():
+            logits, loss, loss_numel = self.forward_logits_loss(images, labels)
+            predicted_labels, _ = self.tokenizer.decode(logits.softmax(-1))
+            predicted_labels = [self.charset_adapter(label) for label in predicted_labels]
+            true_labels = self.decode(labels)
+            count = 0
+            for i in range(len(true_labels)):
+                if true_labels[i] == predicted_labels[i]:
+                    count += 1
+            val_acc = float(count / len(true_labels))
+            self.log("val_loss", loss / loss_numel, on_epoch = True, prog_bar = True, logger = True)
+            self.log("val_acc", val_acc, on_epoch = True, prog_bar = True, logger = True)
+    def on_train_epoch_end(self):
+        train_loss = self.trainer.callback_metrics["train_loss"].item()
+        train_acc = self.trainer.callback_metrics["train_acc"].item()
+        val_loss = self.trainer.callback_metrics["val_loss"].item()
+        val_acc = self.trainer.callback_metrics["val_acc"].item()
+        combined_acc = val_acc + 1e-1 * train_acc
+        self.log("combined_acc", combined_acc, prog_bar = False, logger = True)
+        print(f"Epoch {self.current_epoch}: train_loss = {train_loss:.3f}, train_acc = {train_acc:.3f}, val_loss = {val_loss:.3f}, val_acc = {val_acc:.3f}")
+    def load_weight(self, url):
+        state_dict = torch.hub.load_state_dict_from_url(url = url, map_location = 'cuda', check_hash = True)
+        self.model.load_state_dict(state_dict)
+        print("Load weights sucessfully !!!")
+    def decode(self, ids):
+        true_labels = []
+        if isinstance(ids):
+            ids = ids.tolist()
+        for label in ids:
+            true_label = self.tokenizer._ids2tok(label)
+            true_labels.append(self.charset_adapter(true_label))
+        return true_labels

parseq/utils.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import re
+import torch
+from torch import Tensor
+import torch.nn as nn
+from typing import Sequence
+class CharsetAdapter:
+    def __init__(self):
+        super().__init__()
+        self.charset = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+        self.unsupported = re.compile(f'[^{re.escape(self.charset)}]')
+    def __call__(self, label):
+        label = label.upper()
+        label = self.unsupported.sub('', label)
+        return label
+class Vocab:
+    def __init__(self, charset):
+        self.c2i = dict()
+        self.c2i['<EOS>'] = 0
+        count = 1
+        for c in charset:
+            if c not in self.c2i.keys():
+                self.c2i[c] = count
+                count += 1
+        self.c2i['<SOS>'] = len(self.c2i)
+        self.c2i['<PAD>'] = len(self.c2i)
+        self.i2c = {v : k for k, v in self.c2i.items()}
+    def __len__(self):
+        return len(self.c2i)
+class Tokenizer:
+    def __init__(self, charset, max_len):
+        self.max_len = max_len
+        self.vocab = Vocab(charset)
+        self.sos_id = self.vocab.c2i['<SOS>']
+        self.eos_id = self.vocab.c2i['<EOS>']
+        self.pad_id = self.vocab.c2i['<PAD>']
+        self.special = [self.sos_id, self.eos_id, self.pad_id]
+    def __len__(self):
+        return len(self.vocab)
+    def _tok2ids(self, token : str):
+        return [self.vocab.c2i[c] for c in token]
+    def _ids2tok(self, token_ids, join = True):
+        if isinstance(token_ids, torch.Tensor):
+            token_ids = token_ids.tolist()
+        token = [self.vocab.i2c[i] for i in token_ids if i not in self.special]
+        return ''.join(token) if join else token
+    def encode_batch(self, labels : list[str], device):
+        encoded_labels = []
+        for label in labels:
+            encoded_label = [self.sos_id] + self._tok2ids(label) + [self.eos_id]
+            if len(encoded_label) > self.max_len:
+                encoded_label = encoded_label[ : self.max_len]
+            else:
+                encoded_label = encoded_label + [self.pad_id] * (self.max_len - len(encoded_label))
+            encoded_labels.append(torch.tensor(encoded_label, dtype = torch.long, device = device))
+        return torch.stack(encoded_labels, dim = 0)
+    def _filter(self, probs : Tensor, ids : Tensor):
+        ids = ids.tolist()
+        try:
+            eos_idx = ids.index(self.eos_id)
+        except ValueError:
+            eos_idx = len(ids)
+        ids = ids[ : eos_idx]
+        probs = probs[: eos_idx + 1]
+        return probs, ids
+    def decode(self, token_dists : Tensor, raw : bool = False):
+        batch_tokens = []
+        batch_probs = []
+        for dist in token_dists:
+            probs, ids = dist.max(-1)
+            if not raw:
+                probs, ids = self._filter(probs, ids)
+            tokens = self._ids2tok(ids, not raw)
+            batch_tokens.append(tokens)
+            batch_probs.append(probs)
+        return batch_tokens, batch_probs
+def init_weights(module: nn.Module, name: str = '', exclude: Sequence[str] = ()):
+    if any(map(name.startswith, exclude)):
+        return
+    if isinstance(module, nn.Linear):
+        nn.init.trunc_normal_(module.weight, std=0.02)
+        if module.bias is not None:
+            nn.init.zeros_(module.bias)
+    elif isinstance(module, nn.Embedding):
+        nn.init.trunc_normal_(module.weight, std=0.02)
+        if module.padding_idx is not None:
+            module.weight.data[module.padding_idx].zero_()
+    elif isinstance(module, nn.Conv2d):
+        nn.init.kaiming_normal_(module.weight, mode='fan_out', nonlinearity='relu')
+        if module.bias is not None:
+            nn.init.zeros_(module.bias)
+    elif isinstance(module, (nn.LayerNorm, nn.BatchNorm2d, nn.GroupNorm)):
+        nn.init.ones_(module.weight)
+        nn.init.zeros_(module.bias)

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+tensorflow==2.13.1
+torch==2.4.1
+gradio==4.44.1
+timm==1.0.9
+PIL==10.2.0
+imgaug==0.4.0
+opencv-python==4.10.0
+torchvision==0.19.1

wpodnet/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+__version__ = '1.0.3'
+from .backend import Prediction, Predictor
+__all__ = [
+    'Prediction', 'Predictor'
+]

wpodnet/backend.py ADDED Viewed

	@@ -0,0 +1,157 @@

+from typing import List, Tuple
+import numpy as np
+import torch
+from PIL import Image, ImageDraw
+from torchvision.transforms.functional import (to_tensor)
+import cv2
+from .model import WPODNet
+class Prediction:
+    def __init__(self, image: Image.Image, bounds: np.ndarray, confidence: float):
+        self.image = image
+        self.bounds = bounds
+        self.confidence = confidence
+    def _get_width_height(self):
+        def distance(point1,point2):
+            x1=point1[0]
+            y1=point1[1]
+            x2=point2[0]
+            y2=point2[1]
+            distance = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
+            return distance
+        box = self.bounds
+        dis1= distance(box[0],box[1])
+        dis2 = distance(box[1],box[2])
+        dis3 = distance(box[2],box[3])
+        dis4 = distance(box[3],box[0])
+        width = (dis1+dis3)/2
+        height= (dis2+dis4)/2
+        if height/width >0.49:
+            return 64,46
+        return 100, 23
+    def get_perspective_M(self, width: int, height: int) -> List[float]:
+        # Get the perspective matrix
+        src_points = np.array(self.bounds,dtype=np.float32)
+        dst_points = np.array([[0, 0], [width, 0], [width, height], [0, height]],np.float32)
+        return cv2.getPerspectiveTransform(src_points,dst_points)
+    def annotate(self, outline: str = 'red', width: int = 3) -> Image.Image:
+        canvas = self.image.copy()
+        drawer = ImageDraw.Draw(canvas)
+        drawer.polygon(
+            [(x, y) for x, y in self.bounds],
+            outline=outline,
+            width=width
+        )
+        return canvas
+    def warp(self):#, width: int = 208, height: int = 60) -> Image.Image:
+        # Get the perspective matrix
+        width, height = self._get_width_height()
+        M= self.get_perspective_M(width, height)
+        n_image = np.array(self.image)
+        warped = cv2.warpPerspective(n_image,M,(int(width),int(height)))
+        return warped
+class Predictor:
+    _q = np.array([
+        [-.5, .5, .5, -.5],
+        [-.5, -.5, .5, .5],
+        [1., 1., 1., 1.]
+    ])
+    _scaling_const = 7.75
+    _stride = 16
+    def __init__(self, wpodnet:WPODNet):
+        self.wpodnet = wpodnet
+        self.wpodnet.eval()
+    def _resize_to_fixed_ratio(self, image: Image.Image, dim_min: int, dim_max: int) -> Image.Image:
+        h, w = image.height, image.width
+        wh_ratio = max(h, w) / min(h, w)
+        side = int(wh_ratio * dim_min)
+        bound_dim = min(side + side % self._stride, dim_max)
+        factor = bound_dim / max(h, w)
+        reg_w, reg_h = int(w * factor), int(h * factor)
+        # Ensure the both width and height are the multiply of `self._stride`
+        reg_w_mod = reg_w % self._stride
+        if reg_w_mod > 0:
+            reg_w += self._stride - reg_w_mod
+        reg_h_mod = reg_h % self._stride
+        if reg_h_mod > 0:
+            reg_h += self._stride - reg_h % self._stride
+        return image.resize((reg_w, reg_h))
+    def _to_torch_image(self, image: Image.Image) -> torch.Tensor:
+        tensor = to_tensor(image)
+        return tensor.unsqueeze_(0)
+    def _inference(self, image: torch.Tensor) -> Tuple[np.ndarray, np.ndarray]:
+        with torch.no_grad():
+            probs, affines = self.wpodnet.forward(image)
+        # Convert to squeezed numpy array
+        # grid_w: The number of anchors in row
+        # grid_h: The number of anchors in column
+        probs = np.squeeze(probs.cpu().numpy())[0]     # (grid_h, grid_w)
+        affines = np.squeeze(affines.cpu().numpy())  # (6, grid_h, grid_w)
+        return probs, affines
+    def _get_max_anchor(self, probs: np.ndarray) -> Tuple[int, int]:
+        return np.unravel_index(probs.argmax(), probs.shape)
+    def _get_bounds(self, affines: np.ndarray, anchor_y: int, anchor_x: int, scaling_ratio: float = 1.0) -> np.ndarray:
+        # Compute theta
+        theta = affines[:, anchor_y, anchor_x]
+        theta = theta.reshape((2, 3))
+        theta[0, 0] = max(theta[0, 0], 0.0)
+        theta[1, 1] = max(theta[1, 1], 0.0)
+        # Convert theta into the bounding polygon
+        bounds = np.matmul(theta, self._q) * self._scaling_const * scaling_ratio
+        # Normalize the bounds
+        _, grid_h, grid_w = affines.shape
+        bounds[0] = (bounds[0] + anchor_x + .5) / grid_w
+        bounds[1] = (bounds[1] + anchor_y + .5) / grid_h
+        return np.transpose(bounds)
+    def predict(self, image: Image.Image, scaling_ratio: float = 1.0, dim_min: int = 288, dim_max: int = 608) -> Prediction:
+        orig_h, orig_w = image.height, image.width
+        # Resize the image to fixed ratio
+        # This operation is convienence for setup the anchors
+        resized = self._resize_to_fixed_ratio(image, dim_min=dim_min, dim_max=dim_max)
+        resized = self._to_torch_image(resized)
+        resized = resized.to(self.wpodnet.device)
+        # Inference with WPODNet
+        # probs: The probability distribution of the location of license plate
+        # affines: The predicted affine matrix
+        probs, affines = self._inference(resized)
+        # Get the theta with maximum probability
+        max_prob = np.amax(probs)
+        anchor_y, anchor_x = self._get_max_anchor(probs)
+        bounds = self._get_bounds(affines, anchor_y, anchor_x, scaling_ratio)
+        bounds[:, 0] *= orig_w
+        bounds[:, 1] *= orig_h
+        return Prediction(
+            image=image,
+            bounds=bounds.astype(np.int32),
+            confidence=max_prob.item()
+        )

wpodnet/lib_detection.py ADDED Viewed

	@@ -0,0 +1,265 @@

+# pylint: disable=invalid-name, redefined-outer-name, missing-docstring, non-parent-init-called, trailing-whitespace, line-too-long
+from os.path import splitext
+import cv2
+import numpy as np
+from keras.models import load_model
+import os
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+class Label:
+    def __init__(self, cl=-1, tl=np.array([0., 0.]), br=np.array([0., 0.]), prob=None):
+        self.__tl = tl
+        self.__br = br
+        self.__cl = cl
+        self.__prob = prob
+    def __str__(self):
+        return 'Class: %d, top left(x: %f, y: %f), bottom right(x: %f, y: %f)' % (
+        self.__cl, self.__tl[0], self.__tl[1], self.__br[0], self.__br[1])
+    def copy(self):
+        return Label(self.__cl, self.__tl, self.__br)
+    def wh(self): return self.__br - self.__tl
+    def cc(self): return self.__tl + self.wh() / 2
+    def tl(self): return self.__tl
+    def br(self): return self.__br
+    def tr(self): return np.array([self.__br[0], self.__tl[1]])
+    def bl(self): return np.array([self.__tl[0], self.__br[1]])
+    def cl(self): return self.__cl
+    def area(self): return np.prod(self.wh())
+    def prob(self): return self.__prob
+    def set_class(self, cl):
+        self.__cl = cl
+    def set_tl(self, tl):
+        self.__tl = tl
+    def set_br(self, br):
+        self.__br = br
+    def set_wh(self, wh):
+        cc = self.cc()
+        self.__tl = cc - .5 * wh
+        self.__br = cc + .5 * wh
+    def set_prob(self, prob):
+        self.__prob = prob
+class DLabel(Label):
+    def __init__(self, cl, pts, prob):
+        self.pts = pts
+        tl = np.amin(pts, axis=1)
+        br = np.amax(pts, axis=1)
+        Label.__init__(self, cl, tl, br, prob)
+# Hàm normalize ảnh
+def im2single(Image):
+    return Image.astype('float32') / 255
+def getWH(shape):
+    return np.array(shape[1::-1]).astype(float)
+def IOU(tl1, br1, tl2, br2):
+    wh1, wh2 = br1-tl1, br2-tl2
+    assert((wh1 >= 0).all() and (wh2 >= 0).all())
+    intersection_wh = np.maximum(np.minimum(br1, br2) - np.maximum(tl1, tl2), 0)
+    intersection_area = np.prod(intersection_wh)
+    area1, area2 = (np.prod(wh1), np.prod(wh2))
+    union_area = area1 + area2 - intersection_area
+    return intersection_area/union_area
+def IOU_labels(l1, l2):
+    return IOU(l1.tl(), l1.br(), l2.tl(), l2.br())
+def nms(Labels, iou_threshold=0.5):
+    SelectedLabels = []
+    Labels.sort(key=lambda l: l.prob(), reverse=True)
+    for label in Labels:
+        non_overlap = True
+        for sel_label in SelectedLabels:
+            if IOU_labels(label, sel_label) > iou_threshold:
+                non_overlap = False
+                break
+        if non_overlap:
+            SelectedLabels.append(label)
+    return SelectedLabels
+def load_model_wpod(path):
+    model = load_model(path)
+    return model
+def find_T_matrix(pts, t_pts):
+    A = np.zeros((8, 9))
+    for i in range(0, 4):
+        xi = pts[:, i]
+        xil = t_pts[:, i]
+        xi = xi.T
+        A[i*2, 3:6] = -xil[2]*xi
+        A[i*2, 6:] = xil[1]*xi
+        A[i*2+1, :3] = xil[2]*xi
+        A[i*2+1, 6:] = -xil[0]*xi
+    [U, S, V] = np.linalg.svd(A)
+    H = V[-1, :].reshape((3, 3))
+    return H
+def getRectPts(a, b):
+    return np.array([[0,0], [a, 0], [a, b],[0,b]],np.float32)
+def normal(pts, side, mn, MN):
+    pts_MN_center_mn = pts * side
+    pts_MN = pts_MN_center_mn + mn.reshape((2, 1))
+    pts_prop = pts_MN / MN.reshape((2, 1))
+    return pts_prop
+def get_bound(x,y):
+    bound =[]
+    for i in range(0,len(x)):
+        point =[x[i],y[i]]
+        bound.append(point)
+    return bound
+def calculate_ratio(bound):
+    def distance(point1,point2):
+            x1=point1[0]
+            y1=point1[1]
+            x2=point2[0]
+            y2=point2[1]
+            distance = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
+            return distance
+    box = bound
+    dis1= distance(box[0],box[1])
+    dis2 = distance(box[1],box[2])
+    dis3 = distance(box[2],box[3])
+    dis4 = distance(box[3],box[0])
+    width = (dis1+dis3)/2
+    height= (dis2+dis4)/2
+    ratio = height/width
+    if ratio>0.55:
+        return 2
+    return 1
+# Hàm tái tạo từ predict value thành biến số, cắt từ ảnh chính ra biển số,  nhãn...
+def reconstruct(I, Iresized, Yr, lp_threshold):
+    bounds=[]
+    # 4 max-pooling layers, stride = 2
+    net_stride = 2**4
+    side = ((208 + 40)/2)/net_stride
+    # one line and two lines license plate size
+    one_line = (100, 23)
+    two_lines = (64, 46)
+    Probs = Yr[..., 0]
+    Affines = Yr[..., 2:]
+    xx, yy = np.where(Probs > lp_threshold)
+    # CNN input image size
+    WH = getWH(Iresized.shape)
+    # output feature map size
+    MN = WH/net_stride
+    vxx = vyy = 0.5 #alpha
+    base = lambda vx, vy: np.matrix([[-vx, -vy, 1], [vx, -vy, 1], [vx, vy, 1], [-vx, vy, 1]]).T
+    labels = []
+    labels_frontal = []
+    for i in range(len(xx)):
+        x, y = xx[i], yy[i]
+        affine = Affines[x, y]
+        prob = Probs[x, y]
+        mn = np.array([float(y) + 0.5, float(x) + 0.5])
+        # affine transformation matrix
+        A = np.reshape(affine, (2, 3))
+        A[0, 0] = max(A[0, 0], 0)
+        A[1, 1] = max(A[1, 1], 0)
+        # identity transformation
+        B = np.zeros((2, 3))
+        B[0, 0] = max(A[0, 0], 0)
+        B[1, 1] = max(A[1, 1], 0)
+        pts = np.array(A*base(vxx, vyy))
+        pts_frontal = np.array(B*base(vxx, vyy))
+        pts_prop = normal(pts, side, mn, MN)
+        frontal = normal(pts_frontal, side, mn, MN)
+        labels.append(DLabel(0, pts_prop, prob))
+        labels_frontal.append(DLabel(0, frontal, prob))
+    final_labels = nms(labels, 0.1)
+    final_labels_frontal = nms(labels_frontal, 0.1)
+    if (len(final_labels_frontal)>0):
+        # LP size and type
+        #out_size, lp_type = (two_lines, 2) if ((final_labels_frontal[0].wh()[1] / final_labels_frontal[0].wh()[1]) >0.49) else (one_line, 1)
+        lp_type=0
+        TLp = []
+        if len(final_labels):
+            final_labels.sort(key=lambda x: x.prob(), reverse=True)
+            for _, label in enumerate(final_labels):
+                ptsh = np.concatenate((label.pts * getWH(I.shape).reshape((2, 1)), np.ones((1, 4))))
+                bound = get_bound(ptsh[0],ptsh[1])
+                pts=np.array(bound,dtype=np.float32)
+                bounds.append(bound)
+                lp_type = calculate_ratio(bound)
+                if lp_type==2:
+                    out_size=two_lines
+                else: out_size=one_line
+                t_ptsh = getRectPts(out_size[0], out_size[1])
+                H=cv2.getPerspectiveTransform(pts,t_ptsh)
+                Ilp = cv2.warpPerspective(I,H, (int(out_size[0]),int(out_size[1])))
+                TLp.append(Ilp)
+        return final_labels, TLp, lp_type,bounds
+    else:
+        return None,[], None,None
+def detect_lp(model, I, lp_threshold):
+    Dmax = 350
+    Dmin = 288
+    # Lấy tỷ lệ giữa W và H của ảnh và tìm ra chiều nhỏ nhất
+    ratio = float(max(I.shape[:2])) / min(I.shape[:2])
+    side = int(ratio * Dmin)
+    max_dim = min(side, Dmax)
+    I=im2single(I)
+    # Tính factor resize ảnh
+    min_dim_img = min(I.shape[:2])
+    factor = float(max_dim) / min_dim_img
+    # Tính W và H mới sau khi resize
+    w, h = (np.array(I.shape[1::-1], dtype=float) * factor).astype(int).tolist()
+    # Tiến hành resize ảnh
+    Iresized = cv2.resize(I, (w, h))
+    T = Iresized.copy()
+    # Chuyển thành Tensor
+    T = T.reshape((1, T.shape[0], T.shape[1], T.shape[2]))
+    # Tiến hành detect biển số bằng Wpod-net pretrain
+    Yr = model.predict(T,verbose=0)
+    # Remove các chiều =1 của Yr
+    Yr = np.squeeze(Yr)
+    # Tái tạo và trả về các biến gồm: Nhãn, Ảnh biến số, Loại biển số (1: dài: 2 vuông)
+    L, TLp, lp_type,bounds = reconstruct(I, Iresized, Yr, lp_threshold)
+    return L, TLp, lp_type,bounds

wpodnet/model.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import torch
+import torch.nn as nn
+class BasicConvBlock(nn.Module):
+    def __init__(self, in_channels: int, out_channels: int):
+        super(BasicConvBlock, self).__init__()
+        self.conv_layer = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
+        self.bn_layer = nn.BatchNorm2d(out_channels, momentum=0.99, eps=0.001)
+        self.act_layer = nn.ReLU(inplace=True)
+    def forward(self, x):
+        x = self.conv_layer(x)
+        x = self.bn_layer(x)
+        return self.act_layer(x)
+class ResBlock(nn.Module):
+    def __init__(self, channels: int):
+        super(ResBlock, self).__init__()
+        self.conv_block = BasicConvBlock(channels, channels)
+        self.sec_layer = nn.Conv2d(channels, channels, kernel_size=3, padding=1)
+        self.bn_layer = nn.BatchNorm2d(channels, momentum=0.99, eps=0.001)
+        self.act_layer = nn.ReLU(inplace=True)
+    def forward(self, x):
+        h = self.conv_block(x)
+        h = self.sec_layer(h)
+        h = self.bn_layer(h)
+        return self.act_layer(x + h)
+class WPODNet(nn.Module):
+    def __init__(self):
+        super(WPODNet, self).__init__()
+        self.backbone = nn.Sequential(
+            BasicConvBlock(3, 16),
+            BasicConvBlock(16, 16),
+            nn.MaxPool2d(2),
+            BasicConvBlock(16, 32),
+            ResBlock(32),
+            nn.MaxPool2d(2),
+            BasicConvBlock(32, 64),
+            ResBlock(64),
+            ResBlock(64),
+            nn.MaxPool2d(2),
+            BasicConvBlock(64, 64),
+            ResBlock(64),
+            ResBlock(64),
+            nn.MaxPool2d(2),
+            BasicConvBlock(64, 128),
+            ResBlock(128),
+            ResBlock(128),
+            ResBlock(128),
+            ResBlock(128)
+        )
+        self.prob_layer = nn.Conv2d(128, 2, kernel_size=3, padding=1)
+        self.bbox_layer = nn.Conv2d(128, 6, kernel_size=3, padding=1)
+        # Registry a dummy tensor for retrieve the attached device
+        self.register_buffer('dummy', torch.Tensor(), persistent=False)
+    @property
+    def device(self) -> torch.device:
+        return self.dummy.device
+    def forward(self, image: torch.Tensor):
+        feature: torch.Tensor = self.backbone(image)
+        probs: torch.Tensor = self.prob_layer(feature)
+        probs = torch.softmax(probs, dim=1)
+        affines: torch.Tensor = self.bbox_layer(feature)
+        return probs, affines

wpodnet/stream.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from pathlib import Path
+from typing import Generator, Union
+from PIL import Image
+class ImageStreamer:
+    def __init__(self, image_or_folder: Union[str, Path]):
+        path = Path(image_or_folder)
+        self.generator = self._get_image_generator(path)
+    def _get_image_generator(self, path: Path) -> Generator[Image.Image, None, None]:
+        if path.is_file():
+            image_paths = [path] if self._is_image_file(path) else []
+        elif path.is_dir():
+            image_paths = [
+                p
+                for p in path.rglob('**/*')
+                if self._is_image_file(p)
+            ]
+        else:
+            raise TypeError(f'Invalid path to images {path}')
+        for p in image_paths:
+            yield Image.open(p)
+    def _is_image_file(self, path: Path) -> bool:
+        try:
+            image = Image.open(path)
+            image.verify()
+            return True
+        except Exception:
+            return False
+    def __iter__(self):
+        return self.generator