Spaces:

SakshiRathi77
/

plano_lit

File size: 7,578 Bytes

6ef9ea4

import glob
import json
import os
import xml.etree.ElementTree as ET

import cv2

# from sklearn.externals import joblib
import joblib
import numpy as np
import pandas as pd

# from .variables import old_ocr_req_cols
# from .skew_correction import  PageSkewWraper

const_HW = 1.294117647
const_W = 600
# https://www.forbes.com/sites/forbestechcouncil/2020/06/02/leveraging-technologies-to-align-realograms-and-planograms-for-grocery/?sh=506b8b78e86c


# https://stackoverflow.com/questions/39403183/python-opencv-sorting-contours
# http://devdoc.net/linux/OpenCV-3.2.0/da/d0c/tutorial_bounding_rects_circles.html
# https://stackoverflow.com/questions/10297713/find-contour-of-the-set-of-points-in-opencv
# https://stackoverflow.com/questions/16538774/dealing-with-contours-and-bounding-rectangle-in-opencv-2-4-python-2-7
# https://stackoverflow.com/questions/50308055/creating-bounding-boxes-for-contours
# https://stackoverflow.com/questions/57296398/how-can-i-get-better-results-of-bounding-box-using-find-contours-of-opencv
# http://amroamroamro.github.io/mexopencv/opencv/generalContours_demo1.html
# https://gist.github.com/bigsnarfdude/d811e31ee17495f82f10db12651ae82d
# http://man.hubwiz.com/docset/OpenCV.docset/Contents/Resources/Documents/da/d0c/tutorial_bounding_rects_circles.html
# https://www.analyticsvidhya.com/blog/2021/05/document-layout-detection-and-ocr-with-detectron2/
# https://colab.research.google.com/drive/1m6gaQF6Q4M0IaSjoo_4jWllKJjK-i6fw?usp=sharing#scrollTo=lEyl3wYKHAe1
# https://stackoverflow.com/questions/39403183/python-opencv-sorting-contours
# https://docs.opencv.org/2.4/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.html
# https://www.pyimagesearch.com/2016/03/21/ordering-coordinates-clockwise-with-python-and-opencv/


def bucket_sort(df, colmn, ymax_col="ymax", ymin_col="ymin"):
    df["line_number"] = 0
    colmn.append("line_number")
    array_value = df[colmn].values
    start_index = Line_counter = counter = 0
    ymax, ymin, line_no = (
        colmn.index(ymax_col),
        colmn.index(ymin_col),
        colmn.index("line_number"),
    )
    while counter < len(array_value):
        current_ymax = array_value[start_index][ymax]
        for next_index in range(start_index, len(array_value)):
            counter += 1

            next_ymin = array_value[next_index][ymin]
            next_ymax = array_value[next_index][ymax]
            if current_ymax > next_ymin:

                array_value[next_index][line_no] = Line_counter + 1
            #                 if current_ymax < next_ymax:

            #                     current_ymax = next_ymax
            else:
                counter -= 1
                break
        # print(counter, len(array_value), start_index)
        start_index = counter
        Line_counter += 1
    return pd.DataFrame(array_value, columns=colmn)


def do_sorting(df):
    df.sort_values(["ymin", "xmin"], ascending=True, inplace=True)
    df["idx"] = df.index
    if "line_number" in df.columns:
        print("line number removed")
        df.drop("line_number", axis=1, inplace=True)
    req_colns = ["xmin", "ymin", "xmax", "ymax", "idx"]
    temp_df = df.copy()
    temp = bucket_sort(temp_df.copy(), req_colns)
    df = df.merge(temp[["idx", "line_number"]], on="idx")
    df.sort_values(["line_number", "xmin"], ascending=True, inplace=True)
    df = df.reset_index(drop=True)
    df = df.reset_index(drop=True)
    return df


def xml_to_csv(xml_file):
    # https://gist.github.com/rotemtam/88d9a4efae243fc77ed4a0f9917c8f6c
    xml_list = []
    # for xml_file in glob.glob(path + '/*.xml'):
    # https://discuss.streamlit.io/t/unable-to-read-files-using-standard-file-uploader/2258/2
    tree = ET.parse(xml_file)
    root = tree.getroot()
    for member in root.findall("object"):
        bbx = member.find("bndbox")
        xmin = int(bbx.find("xmin").text)
        ymin = int(bbx.find("ymin").text)
        xmax = int(bbx.find("xmax").text)
        ymax = int(bbx.find("ymax").text)
        label = member.find("name").text

        value = (
            root.find("filename").text,
            int(root.find("size")[0].text),
            int(root.find("size")[1].text),
            label,
            xmin,
            ymin,
            xmax,
            ymax,
        )
        xml_list.append(value)
    column_name = [
        "filename",
        "width",
        "height",
        "cls",
        "xmin",
        "ymin",
        "xmax",
        "ymax",
    ]
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df


# def annotate_planogram_compliance(img0, sorted_xml_df, wrong_indexes, target_names):
#     # annotator = Annotator(img0, line_width=3, pil=True)
#     det = sorted_xml_df[['xmin', 'ymin', 'xmax', 'ymax','cls']].values
#     # det[:, :4] = scale_coords((640, 640), det[:, :4], img0.shape).round()
#     for i, (*xyxy, cls) in enumerate(det):

#         c = int(cls)  # integer class

#         if i in wrong_indexes:
#             # print(xyxy, "Wrong detection", (255, 0, 0))
#             label =  "Wrong detection"
#             color = (0,0,255)
#         else:
#             # print(xyxy, label, (0, 255, 0))
#             label = f'{target_names[c]}'
#             color = (0,255, 0)
#         org = (int(xyxy[0]), int(xyxy[1]) )
#         top_left = org
#         bottom_right = (int(xyxy[2]), int(xyxy[3]))
#         # print("#"*50)
#         # print(f"Anooatting cv2 rectangle with shape: { img0.shape}, top left: { top_left}, bottom right: { bottom_right} , color : { color },  thickness: {3}, cv2.LINE_8")
#         # print("#"*50)
#         cv2.rectangle(img0, top_left, bottom_right , color,  3, cv2.LINE_8)

#         cv2.putText(img0, label, tuple(org), cv2. FONT_HERSHEY_SIMPLEX  , 0.5, color)

#     return img0


def annotate_planogram_compliance(
    img0, sorted_df, correct_indexes, wrong_indexes, target_names
):
    # annotator = Annotator(img0, line_width=3, pil=True)
    det = sorted_df[["xmin", "ymin", "xmax", "ymax", "cls"]].values
    # det[:, :4] = scale_coords((640, 640), det[:, :4], img0.shape).round()
    for x, y in zip(*correct_indexes):
        try:
            row = sorted_df[sorted_df["line_number"] == x + 1].iloc[y]
            xyxy = row[["xmin", "ymin", "xmax", "ymax"]].values
            label = f'{target_names[row["cls"]]}'
            color = (0, 255, 0)
            # org = (int(xyxy[0]), int(xyxy[1]) )
            top_left = (int(row["xmin"]), int(row["ymin"]))
            bottom_right = (int(row["xmax"]), int(row["ymax"]))
            cv2.rectangle(img0, top_left, bottom_right, color, 3, cv2.LINE_8)

            cv2.putText(
                img0, label, top_left, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color
            )
        except Exception as e:
            print("Error: " + str(e))
            continue

    for x, y in zip(*wrong_indexes):
        try:
            row = sorted_df[sorted_df["line_number"] == x + 1].iloc[y]
            xyxy = row[["xmin", "ymin", "xmax", "ymax"]].values
            label = f'{target_names[row["cls"]]}'
            color = (0, 0, 255)
            # org = (int(xyxy[0]), int(xyxy[1]) )
            top_left = (row["xmin"], row["ymin"])
            bottom_right = (row["xmax"], row["ymax"])
            cv2.rectangle(img0, top_left, bottom_right, color, 3, cv2.LINE_8)

            cv2.putText(
                img0, label, top_left, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color
            )
        except Exception as e:
            print("Error: " + str(e))
            continue

    return img0