SakshiRathi77 commited on
Commit
af98fd6
1 Parent(s): 8acc524

Upload 2 files

Browse files
Files changed (2) hide show
  1. app_utils.py +196 -0
  2. inference.py +226 -0
app_utils.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import glob
2
+ import json
3
+ import os
4
+ import xml.etree.ElementTree as ET
5
+
6
+ import cv2
7
+
8
+ # from sklearn.externals import joblib
9
+ import joblib
10
+ import numpy as np
11
+ import pandas as pd
12
+
13
+ # from .variables import old_ocr_req_cols
14
+ # from .skew_correction import PageSkewWraper
15
+
16
+ const_HW = 1.294117647
17
+ const_W = 600
18
+ # https://www.forbes.com/sites/forbestechcouncil/2020/06/02/leveraging-technologies-to-align-realograms-and-planograms-for-grocery/?sh=506b8b78e86c
19
+
20
+
21
+ # https://stackoverflow.com/questions/39403183/python-opencv-sorting-contours
22
+ # http://devdoc.net/linux/OpenCV-3.2.0/da/d0c/tutorial_bounding_rects_circles.html
23
+ # https://stackoverflow.com/questions/10297713/find-contour-of-the-set-of-points-in-opencv
24
+ # https://stackoverflow.com/questions/16538774/dealing-with-contours-and-bounding-rectangle-in-opencv-2-4-python-2-7
25
+ # https://stackoverflow.com/questions/50308055/creating-bounding-boxes-for-contours
26
+ # https://stackoverflow.com/questions/57296398/how-can-i-get-better-results-of-bounding-box-using-find-contours-of-opencv
27
+ # http://amroamroamro.github.io/mexopencv/opencv/generalContours_demo1.html
28
+ # https://gist.github.com/bigsnarfdude/d811e31ee17495f82f10db12651ae82d
29
+ # http://man.hubwiz.com/docset/OpenCV.docset/Contents/Resources/Documents/da/d0c/tutorial_bounding_rects_circles.html
30
+ # https://www.analyticsvidhya.com/blog/2021/05/document-layout-detection-and-ocr-with-detectron2/
31
+ # https://colab.research.google.com/drive/1m6gaQF6Q4M0IaSjoo_4jWllKJjK-i6fw?usp=sharing#scrollTo=lEyl3wYKHAe1
32
+ # https://stackoverflow.com/questions/39403183/python-opencv-sorting-contours
33
+ # https://docs.opencv.org/2.4/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.html
34
+ # https://www.pyimagesearch.com/2016/03/21/ordering-coordinates-clockwise-with-python-and-opencv/
35
+
36
+
37
+ def bucket_sort(df, colmn, ymax_col="ymax", ymin_col="ymin"):
38
+ df["line_number"] = 0
39
+ colmn.append("line_number")
40
+ array_value = df[colmn].values
41
+ start_index = Line_counter = counter = 0
42
+ ymax, ymin, line_no = (
43
+ colmn.index(ymax_col),
44
+ colmn.index(ymin_col),
45
+ colmn.index("line_number"),
46
+ )
47
+ while counter < len(array_value):
48
+ current_ymax = array_value[start_index][ymax]
49
+ for next_index in range(start_index, len(array_value)):
50
+ counter += 1
51
+
52
+ next_ymin = array_value[next_index][ymin]
53
+ next_ymax = array_value[next_index][ymax]
54
+ if current_ymax > next_ymin:
55
+
56
+ array_value[next_index][line_no] = Line_counter + 1
57
+ # if current_ymax < next_ymax:
58
+
59
+ # current_ymax = next_ymax
60
+ else:
61
+ counter -= 1
62
+ break
63
+ # print(counter, len(array_value), start_index)
64
+ start_index = counter
65
+ Line_counter += 1
66
+ return pd.DataFrame(array_value, columns=colmn)
67
+
68
+
69
+ def do_sorting(df):
70
+ df.sort_values(["ymin", "xmin"], ascending=True, inplace=True)
71
+ df["idx"] = df.index
72
+ if "line_number" in df.columns:
73
+ print("line number removed")
74
+ df.drop("line_number", axis=1, inplace=True)
75
+ req_colns = ["xmin", "ymin", "xmax", "ymax", "idx"]
76
+ temp_df = df.copy()
77
+ temp = bucket_sort(temp_df.copy(), req_colns)
78
+ df = df.merge(temp[["idx", "line_number"]], on="idx")
79
+ df.sort_values(["line_number", "xmin"], ascending=True, inplace=True)
80
+ df = df.reset_index(drop=True)
81
+ df = df.reset_index(drop=True)
82
+ return df
83
+
84
+
85
+ def xml_to_csv(xml_file):
86
+ # https://gist.github.com/rotemtam/88d9a4efae243fc77ed4a0f9917c8f6c
87
+ xml_list = []
88
+ # for xml_file in glob.glob(path + '/*.xml'):
89
+ # https://discuss.streamlit.io/t/unable-to-read-files-using-standard-file-uploader/2258/2
90
+ tree = ET.parse(xml_file)
91
+ root = tree.getroot()
92
+ for member in root.findall("object"):
93
+ bbx = member.find("bndbox")
94
+ xmin = int(bbx.find("xmin").text)
95
+ ymin = int(bbx.find("ymin").text)
96
+ xmax = int(bbx.find("xmax").text)
97
+ ymax = int(bbx.find("ymax").text)
98
+ label = member.find("name").text
99
+
100
+ value = (
101
+ root.find("filename").text,
102
+ int(root.find("size")[0].text),
103
+ int(root.find("size")[1].text),
104
+ label,
105
+ xmin,
106
+ ymin,
107
+ xmax,
108
+ ymax,
109
+ )
110
+ xml_list.append(value)
111
+ column_name = [
112
+ "filename",
113
+ "width",
114
+ "height",
115
+ "cls",
116
+ "xmin",
117
+ "ymin",
118
+ "xmax",
119
+ "ymax",
120
+ ]
121
+ xml_df = pd.DataFrame(xml_list, columns=column_name)
122
+ return xml_df
123
+
124
+
125
+ # def annotate_planogram_compliance(img0, sorted_xml_df, wrong_indexes, target_names):
126
+ # # annotator = Annotator(img0, line_width=3, pil=True)
127
+ # det = sorted_xml_df[['xmin', 'ymin', 'xmax', 'ymax','cls']].values
128
+ # # det[:, :4] = scale_coords((640, 640), det[:, :4], img0.shape).round()
129
+ # for i, (*xyxy, cls) in enumerate(det):
130
+
131
+ # c = int(cls) # integer class
132
+
133
+ # if i in wrong_indexes:
134
+ # # print(xyxy, "Wrong detection", (255, 0, 0))
135
+ # label = "Wrong detection"
136
+ # color = (0,0,255)
137
+ # else:
138
+ # # print(xyxy, label, (0, 255, 0))
139
+ # label = f'{target_names[c]}'
140
+ # color = (0,255, 0)
141
+ # org = (int(xyxy[0]), int(xyxy[1]) )
142
+ # top_left = org
143
+ # bottom_right = (int(xyxy[2]), int(xyxy[3]))
144
+ # # print("#"*50)
145
+ # # print(f"Anooatting cv2 rectangle with shape: { img0.shape}, top left: { top_left}, bottom right: { bottom_right} , color : { color }, thickness: {3}, cv2.LINE_8")
146
+ # # print("#"*50)
147
+ # cv2.rectangle(img0, top_left, bottom_right , color, 3, cv2.LINE_8)
148
+
149
+ # cv2.putText(img0, label, tuple(org), cv2. FONT_HERSHEY_SIMPLEX , 0.5, color)
150
+
151
+ # return img0
152
+
153
+
154
+ def annotate_planogram_compliance(
155
+ img0, sorted_df, correct_indexes, wrong_indexes, target_names
156
+ ):
157
+ # annotator = Annotator(img0, line_width=3, pil=True)
158
+ det = sorted_df[["xmin", "ymin", "xmax", "ymax", "cls"]].values
159
+ # det[:, :4] = scale_coords((640, 640), det[:, :4], img0.shape).round()
160
+ for x, y in zip(*correct_indexes):
161
+ try:
162
+ row = sorted_df[sorted_df["line_number"] == x + 1].iloc[y]
163
+ xyxy = row[["xmin", "ymin", "xmax", "ymax"]].values
164
+ label = f'{target_names[row["cls"]]}'
165
+ color = (0, 255, 0)
166
+ # org = (int(xyxy[0]), int(xyxy[1]) )
167
+ top_left = (int(row["xmin"]), int(row["ymin"]))
168
+ bottom_right = (int(row["xmax"]), int(row["ymax"]))
169
+ cv2.rectangle(img0, top_left, bottom_right, color, 3, cv2.LINE_8)
170
+
171
+ cv2.putText(
172
+ img0, label, top_left, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color
173
+ )
174
+ except Exception as e:
175
+ print("Error: " + str(e))
176
+ continue
177
+
178
+ for x, y in zip(*wrong_indexes):
179
+ try:
180
+ row = sorted_df[sorted_df["line_number"] == x + 1].iloc[y]
181
+ xyxy = row[["xmin", "ymin", "xmax", "ymax"]].values
182
+ label = f'{target_names[row["cls"]]}'
183
+ color = (0, 0, 255)
184
+ # org = (int(xyxy[0]), int(xyxy[1]) )
185
+ top_left = (row["xmin"], row["ymin"])
186
+ bottom_right = (row["xmax"], row["ymax"])
187
+ cv2.rectangle(img0, top_left, bottom_right, color, 3, cv2.LINE_8)
188
+
189
+ cv2.putText(
190
+ img0, label, top_left, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color
191
+ )
192
+ except Exception as e:
193
+ print("Error: " + str(e))
194
+ continue
195
+
196
+ return img0
inference.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
+ """
3
+ Run YOLOv5 detection inference on images, videos, directories, globs, YouTube, webcam, streams, etc.
4
+
5
+ Usage - sources:
6
+ $ python detect.py --weights yolov5s.pt --source 0 # webcam
7
+ img.jpg # image
8
+ vid.mp4 # video
9
+ screen # screenshot
10
+ path/ # directory
11
+ list.txt # list of images
12
+ list.streams # list of streams
13
+ 'path/*.jpg' # glob
14
+ 'https://youtu.be/Zgi9g1ksQHc' # YouTube
15
+ 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream
16
+
17
+ Usage - formats:
18
+ $ python detect.py --weights yolov5s.pt # PyTorch
19
+ yolov5s.torchscript # TorchScript
20
+ yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn
21
+ yolov5s_openvino_model # OpenVINO
22
+ yolov5s.engine # TensorRT
23
+ yolov5s.mlmodel # CoreML (macOS-only)
24
+ yolov5s_saved_model # TensorFlow SavedModel
25
+ yolov5s.pb # TensorFlow GraphDef
26
+ yolov5s.tflite # TensorFlow Lite
27
+ yolov5s_edgetpu.tflite # TensorFlow Edge TPU
28
+ yolov5s_paddle_model # PaddlePaddle
29
+ """
30
+
31
+ import argparse
32
+ import os
33
+ import platform
34
+ import sys
35
+ from pathlib import Path
36
+
37
+ import torch
38
+
39
+ FILE = Path(__file__).resolve()
40
+ ROOT = FILE.parents[0] # YOLOv5 root directory
41
+ if str(ROOT) not in sys.path:
42
+ sys.path.append(str(ROOT)) # add ROOT to PATH
43
+ ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
44
+
45
+ from models.common import DetectMultiBackend
46
+ from utils.dataloaders import (
47
+ IMG_FORMATS,
48
+ VID_FORMATS,
49
+ LoadImages,
50
+ LoadScreenshots,
51
+ LoadStreams,
52
+ )
53
+ from utils.general import (
54
+ LOGGER,
55
+ Profile,
56
+ check_file,
57
+ check_img_size,
58
+ check_imshow,
59
+ check_requirements,
60
+ colorstr,
61
+ cv2,
62
+ increment_path,
63
+ non_max_suppression,
64
+ print_args,
65
+ scale_boxes,
66
+ strip_optimizer,
67
+ xyxy2xywh,
68
+ )
69
+ from utils.plots import Annotator, colors, save_one_box
70
+ from utils.torch_utils import select_device, smart_inference_mode
71
+
72
+
73
+ @smart_inference_mode()
74
+ def run(
75
+ weights=ROOT / "yolov5s.pt", # model path or triton URL
76
+ source=ROOT / "data/images", # file/dir/URL/glob/screen/0(webcam)
77
+ data=ROOT / "data/coco128.yaml", # dataset.yaml path
78
+ imgsz=(640, 640), # inference size (height, width)
79
+ conf_thres=0.25, # confidence threshold
80
+ iou_thres=0.45, # NMS IOU threshold
81
+ max_det=1000, # maximum detections per image
82
+ device="", # cuda device, i.e. 0 or 0,1,2,3 or cpu
83
+ view_img=False, # show results
84
+ save_txt=False, # save results to *.txt
85
+ save_conf=False, # save confidences in --save-txt labels
86
+ save_crop=False, # save cropped prediction boxes
87
+ nosave=False, # do not save images/videos
88
+ classes=None, # filter by class: --class 0, or --class 0 2 3
89
+ agnostic_nms=False, # class-agnostic NMS
90
+ augment=False, # augmented inference
91
+ visualize=False, # visualize features
92
+ update=False, # update all models
93
+ project=ROOT / "runs/detect", # save results to project/name
94
+ name="exp", # save results to project/name
95
+ exist_ok=False, # existing project/name ok, do not increment
96
+ line_thickness=3, # bounding box thickness (pixels)
97
+ hide_labels=False, # hide labels
98
+ hide_conf=False, # hide confidences
99
+ half=False, # use FP16 half-precision inference
100
+ dnn=False, # use OpenCV DNN for ONNX inference
101
+ vid_stride=1, # video frame-rate stride
102
+ ):
103
+ source = str(source)
104
+ save_img = not nosave and not source.endswith(
105
+ ".txt"
106
+ ) # save inference images
107
+ is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
108
+ is_url = source.lower().startswith(
109
+ ("rtsp://", "rtmp://", "http://", "https://")
110
+ )
111
+ webcam = (
112
+ source.isnumeric()
113
+ or source.endswith(".streams")
114
+ or (is_url and not is_file)
115
+ )
116
+ screenshot = source.lower().startswith("screen")
117
+ if is_url and is_file:
118
+ source = check_file(source) # download
119
+
120
+ # Directories
121
+ save_dir = increment_path(
122
+ Path(project) / name, exist_ok=exist_ok
123
+ ) # increment run
124
+ (save_dir / "labels" if save_txt else save_dir).mkdir(
125
+ parents=True, exist_ok=True
126
+ ) # make dir
127
+
128
+ # Load model
129
+ device = select_device(device)
130
+ model = DetectMultiBackend(
131
+ weights, device=device, dnn=dnn, data=data, fp16=half
132
+ )
133
+ stride, names, pt = model.stride, model.names, model.pt
134
+ imgsz = check_img_size(imgsz, s=stride) # check image size
135
+
136
+ # Dataloader
137
+ bs = 1 # batch_size
138
+ if webcam:
139
+ view_img = check_imshow(warn=True)
140
+ dataset = LoadStreams(
141
+ source,
142
+ img_size=imgsz,
143
+ stride=stride,
144
+ auto=pt,
145
+ vid_stride=vid_stride,
146
+ )
147
+ bs = len(dataset)
148
+ elif screenshot:
149
+ dataset = LoadScreenshots(
150
+ source, img_size=imgsz, stride=stride, auto=pt
151
+ )
152
+ else:
153
+ dataset = LoadImages(
154
+ source,
155
+ img_size=imgsz,
156
+ stride=stride,
157
+ auto=pt,
158
+ vid_stride=vid_stride,
159
+ )
160
+ vid_path, vid_writer = [None] * bs, [None] * bs
161
+
162
+ # Run inference
163
+ model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup
164
+ seen, windows, dt = 0, [], (Profile(), Profile(), Profile())
165
+ for path, im, im0s, vid_cap, s in dataset:
166
+ with dt[0]:
167
+ im = torch.from_numpy(im).to(model.device)
168
+ im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
169
+ im /= 255 # 0 - 255 to 0.0 - 1.0
170
+ if len(im.shape) == 3:
171
+ im = im[None] # expand for batch dim
172
+
173
+ # Inference
174
+ with dt[1]:
175
+ visualize = (
176
+ increment_path(save_dir / Path(path).stem, mkdir=True)
177
+ if visualize
178
+ else False
179
+ )
180
+ pred = model(im, augment=augment, visualize=visualize)
181
+
182
+ # NMS
183
+ with dt[2]:
184
+ pred = non_max_suppression(
185
+ pred,
186
+ conf_thres,
187
+ iou_thres,
188
+ classes,
189
+ agnostic_nms,
190
+ max_det=max_det,
191
+ )
192
+
193
+ # Second-stage classifier (optional)
194
+ # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
195
+
196
+ # Process predictions
197
+ for i, det in enumerate(pred): # per image
198
+ seen += 1
199
+ if webcam: # batch_size >= 1
200
+ p, im0, frame = path[i], im0s[i].copy(), dataset.count
201
+ s += f"{i}: "
202
+ else:
203
+ p, im0, frame = path, im0s.copy(), getattr(dataset, "frame", 0)
204
+
205
+ p = Path(p) # to Path
206
+ save_path = str(save_dir / p.name) # im.jpg
207
+ txt_path = str(save_dir / "labels" / p.stem) + (
208
+ "" if dataset.mode == "image" else f"_{frame}"
209
+ ) # im.txt
210
+ s += "%gx%g " % im.shape[2:] # print string
211
+ gn = torch.tensor(im0.shape)[
212
+ [1, 0, 1, 0]
213
+ ] # normalization gain whwh
214
+ imc = im0.copy() if save_crop else im0 # for save_crop
215
+ annotator = Annotator(
216
+ im0, line_width=line_thickness, example=str(names)
217
+ )
218
+ results = []
219
+ if len(det):
220
+ # Rescale boxes from img_size to im0 size
221
+ det[:, :4] = scale_boxes(
222
+ im.shape[2:], det[:, :4], im0.shape
223
+ ).round()
224
+ results.append((path, det))
225
+
226
+ return results