import torch 
import torch.nn as nn
from parseq.system import System
import yaml
import cv2
from PIL import Image
from wpodnet.lib_detection import load_model_wpod, detect_lp
import numpy as np
import gradio as gr 
from torchvision import transforms as T
import matplotlib.pyplot as plt
trans = T.Compose([
            T.Resize((224, 224), T.InterpolationMode.BICUBIC),
            T.ToTensor(),
            T.Normalize(0.5, 0.5)
        ])

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

checkpoint_path = 'weights/best.ckpt'
config_path = 'parseq/config.yaml'
wpod_path = 'weights/wpod-net.h5'
wpod_net = load_model_wpod(wpod_path)

with open(config_path, 'r') as data:
    config = yaml.safe_load(data)
system = System(config)
checkpoint = torch.load(checkpoint_path, map_location = 'cpu')
system.load_state_dict(checkpoint['state_dict'])
system.to(device)

def predict(image):
    if isinstance(image, str):
        image = cv2.imread(image)
    
    draw_image = image.copy()
    _, img_wapred, _, bounding_boxes = detect_lp(wpod_net, image, 0.5)
    if len(img_wapred) == 0:
        return draw_image
    else:
        system.eval()
        bounding_boxes = np.array(bounding_boxes).astype(int)
        for i in range(len(img_wapred)):
            img =  (img_wapred[i] * 255).astype(np.uint8)
            img = Image.fromarray(img).convert("RGB")
            image = trans(img).unsqueeze(0)
            with torch.no_grad():
                pred = system(image).softmax(-1)
            generated_text, _ = system.tokenizer.decode(pred)
            if len(generated_text[0]) >= 5:
                points = bounding_boxes[i]
                cv2.polylines(draw_image, [points], isClosed = True, color = (0, 255, 0), thickness = 2)
                position = (points[:, 0].min(), points[:, 1].min())
                cv2.putText(draw_image, generated_text[0], position, 
                            fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale = 0.8, color=(226, 218, 53), thickness = 2)
        return draw_image

interface = gr.Interface(
    fn = predict,
    inputs =[gr.components.Image()],
    outputs=[gr.components.Image()])
interface.launch(debug = True)