piyushgrover commited on
Commit
5bfab10
·
1 Parent(s): 96d483e

added space app files

Browse files
README.md CHANGED
@@ -1,13 +1,2 @@
1
- ---
2
- title: Yolov3
3
- emoji: 📊
4
- colorFrom: yellow
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 3.40.1
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # yolov3
2
+ S13 ERA V1
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from typing import List
3
+ import cv2
4
+ import torch
5
+ from torchvision import transforms
6
+ import numpy as np
7
+ from PIL import Image
8
+ from pytorch_grad_cam import GradCAM
9
+ from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
10
+ from pytorch_grad_cam.utils.image import show_cam_on_image
11
+ import io
12
+ from models import YoloV3Lightning
13
+ from utils import load_model_from_checkpoint
14
+ import utils
15
+ import config as cfg
16
+ import matplotlib.pyplot as plt
17
+ import matplotlib.patches as patches
18
+ from dataset import YOLODataset
19
+ from torch.utils.data import Dataset, DataLoader
20
+ from grad_cam import YoloGradCAM
21
+
22
+ device = torch.device('cpu')
23
+ dataset_mean, dataset_std = (0.4914, 0.4822, 0.4465), \
24
+ (0.2470, 0.2435, 0.2616)
25
+ model = YoloV3Lightning.YOLOv3LightningModel(num_classes=cfg.NUM_CLASSES, anchors=cfg.ANCHORS, S=cfg.S)
26
+ ckpt_file = 'ckpt_light.pth'
27
+ checkpoint = load_model_from_checkpoint(device, file_name=ckpt_file)
28
+ model.load_state_dict(checkpoint['model'], strict=False)
29
+
30
+ model.eval()
31
+
32
+ scaled_anchors = (
33
+ torch.tensor(cfg.ANCHORS)
34
+ * torch.tensor(cfg.S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
35
+ ).to(model.device)
36
+
37
+ cam = YoloGradCAM(model=model, target_layers=[model.layers[-2]], scaled_anchors=scaled_anchors, use_cuda=False)
38
+ '''cfg.IMG_DIR = cfg.DATASET + "/images/"
39
+ cfg.LABEL_DIR = cfg.DATASET + "/labels/"
40
+ eval_dataset = YOLODataset(
41
+ cfg.DATASET + "/25examples.csv",
42
+ transform=cfg.test_transforms,
43
+ S=[cfg.IMAGE_SIZE // 32, cfg.IMAGE_SIZE // 16, cfg.IMAGE_SIZE // 8],
44
+ img_dir=cfg.IMG_DIR,
45
+ label_dir=cfg.LABEL_DIR,
46
+ anchors=cfg.ANCHORS,
47
+ mosaic=False
48
+ )
49
+ eval_loader = DataLoader(
50
+ dataset=eval_dataset,
51
+ batch_size=cfg.BATCH_SIZE,
52
+ num_workers=cfg.NUM_WORKERS,
53
+ pin_memory=cfg.PIN_MEMORY,
54
+ shuffle=True,
55
+ drop_last=False,
56
+ )
57
+
58
+ scaled_anchors = (
59
+ torch.tensor(cfg.ANCHORS)
60
+ * torch.tensor(cfg.S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
61
+ )
62
+ scaled_anchors = scaled_anchors.to(cfg.DEVICE)
63
+
64
+ utils.plot_examples(model, eval_loader, 0.5, 0.6, scaled_anchors)'''
65
+
66
+ sample_images = [
67
+ ['images/000001.jpg'],
68
+ ['images/000002.jpg'],
69
+ ['images/000003.jpg'],
70
+ ['images/000004.jpg'],
71
+ ['images/000005.jpg'],
72
+ ['images/000006.jpg'],
73
+ ['images/000007.jpg'],
74
+ ['images/000008.jpg'],
75
+ ['images/000009.jpg'],
76
+ ['images/000010.jpg'],
77
+ ['images/000011.jpg'],
78
+ ['images/000012.jpg'],
79
+ ['images/000013.jpg'],
80
+ ['images/000014.jpg'],
81
+ ['images/000015.jpg'],
82
+ ['images/000016.jpg'],
83
+ ['images/000017.jpg'],
84
+ ['images/000018.jpg'],
85
+ ['images/000019.jpg'],
86
+ ['images/000020.jpg'],
87
+ ['images/000021.jpg'],
88
+ ['images/000022.jpg'],
89
+ ['images/000023.jpg'],
90
+ ['images/000024.jpg'],
91
+ ['images/000025.jpg']
92
+ ]
93
+
94
+ with gr.Blocks() as app:
95
+ with gr.Row():
96
+
97
+ gr.Markdown(
98
+ """
99
+ # YoloV3 App!
100
+ ## Model is trained on PASCAL-VOC data to predict following classes -
101
+ """)
102
+
103
+ with gr.Row():
104
+ gr.HTML(
105
+ """
106
+ <table>
107
+ <tr>
108
+ <th>aeroplane</th>
109
+ <th>bicycle</th>
110
+ <th>bird</th>
111
+ <th>boat</th>
112
+ <th>bottle</th>
113
+ <th>bus</th>
114
+ <th>car</th>
115
+ <th>cat</th>
116
+ </tr>
117
+ <tr>
118
+ <th>chair</th>
119
+ <th>cow</th>
120
+ <th>diningtable</th>
121
+ <th>dog</th>
122
+ <th>horse</th>
123
+ <th>motorbike</th>
124
+ <th>person</th>
125
+ <th>pottedplant</th>
126
+ </tr>
127
+ <tr>
128
+ <th>sheep</th>
129
+ <th>sofa</th>
130
+ <th>train</th>
131
+ <th>tvmonitor</th>
132
+ </tr>
133
+
134
+ </table>
135
+ <p>
136
+ <a href='https://github.com/piygr/yolov3/blob/main/models/YoloV3Lightning.py'>Click to see the model architecture / code </a>
137
+
138
+ </p>
139
+ """
140
+ )
141
+ with gr.Row(visible=True) as top_pred_cls_col:
142
+ with gr.Column():
143
+ example_images = gr.Gallery(allow_preview=False, label='Select image ', info='',
144
+ value=[img[0] for img in sample_images], columns=3, rows=2)
145
+
146
+ with gr.Column():
147
+ top_pred_image = gr.Image(label='Upload Image or Select from the gallery')
148
+
149
+ with gr.Row():
150
+ top_class_btn = gr.Button("Submit", variant='primary')
151
+ tc_clear_btn = gr.ClearButton()
152
+
153
+ with gr.Row():
154
+ if_show_grad_cam = gr.Checkbox(value=True, label='Show Class Activation Map (What the model sees)?')
155
+
156
+ # with gr.Row(visible=True) as top_class_output:
157
+ with gr.Row(visible=True) as top_class_output:
158
+ top_class_output_img = gr.Image(interactive=False, label='Prediction Output')
159
+ with gr.Row(visible=True) as top_class_output:
160
+ grad_cam_out = gr.Image(interactive=False, visible=True, label='CAM Outcome')
161
+
162
+
163
+ def show_cam_output(input):
164
+ return {
165
+ grad_cam_out: gr.update(visible=input)
166
+ }
167
+
168
+
169
+ if_show_grad_cam.change(
170
+ show_cam_output,
171
+ if_show_grad_cam,
172
+ grad_cam_out
173
+ )
174
+
175
+
176
+ def clear_data():
177
+ return {
178
+ top_pred_image: None,
179
+ top_class_output_img: None
180
+ }
181
+
182
+
183
+ tc_clear_btn.click(clear_data, None, [top_pred_image, top_class_output_img])
184
+
185
+
186
+ def on_select(evt: gr.SelectData):
187
+ return {
188
+ top_pred_image: sample_images[evt.index][0]
189
+ }
190
+
191
+
192
+ example_images.select(on_select, None, top_pred_image)
193
+
194
+
195
+ def plot_image(image, boxes):
196
+ """Plots predicted bounding boxes on the image"""
197
+ cmap = plt.get_cmap("tab20b")
198
+ class_labels = cfg.PASCAL_CLASSES
199
+ colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
200
+ im = np.array(image)
201
+ height, width, _ = im.shape
202
+
203
+ # Create figure and axes
204
+ fig, ax = plt.subplots(1)
205
+ # Display the image
206
+ ax.imshow(im)
207
+
208
+ # box[0] is x midpoint, box[2] is width
209
+ # box[1] is y midpoint, box[3] is height
210
+
211
+ # Create a Rectangle patch
212
+ for box in boxes:
213
+ assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height"
214
+ class_pred = box[0]
215
+ box = box[2:]
216
+ upper_left_x = box[0] - box[2] / 2
217
+ upper_left_y = box[1] - box[3] / 2
218
+ rect = patches.Rectangle(
219
+ (upper_left_x * width, upper_left_y * height),
220
+ box[2] * width,
221
+ box[3] * height,
222
+ linewidth=2,
223
+ edgecolor=colors[int(class_pred)],
224
+ facecolor="none",
225
+ )
226
+ # Add the patch to the Axes
227
+ ax.add_patch(rect)
228
+ plt.text(
229
+ upper_left_x * width,
230
+ upper_left_y * height,
231
+ s=class_labels[int(class_pred)],
232
+ color="white",
233
+ verticalalignment="top",
234
+ bbox={"color": colors[int(class_pred)], "pad": 0},
235
+ )
236
+
237
+ plt.savefig('output.png')
238
+ x = plt.show()
239
+
240
+
241
+ def predict(image: np.ndarray, iou_thresh: float = 0.5, thresh: float = 0.4, show_cam: bool = False,
242
+ transparency: float = 0.5) -> List[np.ndarray]:
243
+ with torch.no_grad():
244
+ transformed_image = cfg.grad_cam_transforms(image=image)["image"].unsqueeze(0)
245
+ output = model(transformed_image)
246
+
247
+ bboxes = [[] for _ in range(1)]
248
+ for i in range(3):
249
+ batch_size, A, S, _, _ = output[i].shape
250
+ anchor = scaled_anchors[i]
251
+ boxes_scale_i = utils.cells_to_bboxes(
252
+ output[i], anchor, S=S, is_preds=True
253
+ )
254
+ for idx, (box) in enumerate(boxes_scale_i):
255
+ bboxes[idx] += box
256
+
257
+ nms_boxes = utils.non_max_suppression(
258
+ bboxes[0], iou_threshold=iou_thresh, threshold=thresh, box_format="midpoint",
259
+ )
260
+
261
+ plot_image(image, nms_boxes)
262
+ plotted_img = 'output.png'
263
+ if not show_cam:
264
+ return [plotted_img, None]
265
+
266
+ grayscale_cam = cam(transformed_image)[0, :, :]
267
+ img = cv2.resize(image, (416, 416))
268
+ img = np.float32(img) / 255
269
+ cam_image = show_cam_on_image(img, grayscale_cam, use_rgb=True, image_weight=transparency)
270
+ return [plotted_img, cam_image]
271
+
272
+
273
+ def top_class_img_upload(input_img, if_cam):
274
+ if input_img is not None:
275
+ imgs = predict(input_img, show_cam=if_cam)
276
+
277
+ return {
278
+ top_class_output_img: imgs[0],
279
+ grad_cam_out: imgs[1]
280
+ }
281
+
282
+
283
+ top_class_btn.click(
284
+ top_class_img_upload,
285
+ [top_pred_image, if_show_grad_cam],
286
+ [top_class_output_img, grad_cam_out]
287
+ )
288
+
289
+ '''
290
+ Launch the app
291
+ '''
292
+ app.launch()
config.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import albumentations as A
2
+ import cv2
3
+ import torch
4
+
5
+ from albumentations.pytorch import ToTensorV2
6
+
7
+ SAMPLE_DATASET = '../../input/d/piygro/sample-pascal/PASCAL_VOC'
8
+ DATASET = 'PASCAL_VOC'
9
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
10
+ # seed_everything() # If you want deterministic behavior
11
+ NUM_WORKERS = 2
12
+ BATCH_SIZE = 16
13
+ IMAGE_SIZE = 416
14
+ NUM_CLASSES = 20
15
+ LEARNING_RATE = 1e-3
16
+ WEIGHT_DECAY = 1e-4
17
+ NUM_EPOCHS = 40
18
+ CONF_THRESHOLD = 0.05
19
+ MAP_IOU_THRESH = 0.5
20
+ NMS_IOU_THRESH = 0.45
21
+ S = [IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8]
22
+ PIN_MEMORY = True
23
+ LOAD_MODEL = False
24
+ SAVE_MODEL = True
25
+ CHECKPOINT_FILE = "checkpoint.ckpt.tar"
26
+ IMG_DIR = DATASET + "/images/"
27
+ LABEL_DIR = DATASET + "/labels/"
28
+
29
+ ANCHORS = [
30
+ [(0.28, 0.22), (0.38, 0.48), (0.9, 0.78)],
31
+ [(0.07, 0.15), (0.15, 0.11), (0.14, 0.29)],
32
+ [(0.02, 0.03), (0.04, 0.07), (0.08, 0.06)],
33
+ ] # Note these have been rescaled to be between [0, 1]
34
+
35
+ means = [0.485, 0.456, 0.406]
36
+
37
+ scale = 1.1
38
+ train_transforms = A.Compose(
39
+ [
40
+ A.LongestMaxSize(max_size=int(IMAGE_SIZE * scale)),
41
+ A.PadIfNeeded(
42
+ min_height=int(IMAGE_SIZE * scale),
43
+ min_width=int(IMAGE_SIZE * scale),
44
+ border_mode=cv2.BORDER_CONSTANT,
45
+ ),
46
+ A.Rotate(limit = 10, interpolation=1, border_mode=4),
47
+ A.RandomCrop(width=IMAGE_SIZE, height=IMAGE_SIZE),
48
+ A.ColorJitter(brightness=0.6, contrast=0.6, saturation=0.6, hue=0.6, p=0.4),
49
+ A.OneOf(
50
+ [
51
+ A.ShiftScaleRotate(
52
+ rotate_limit=20, p=0.5, border_mode=cv2.BORDER_CONSTANT
53
+ ),
54
+ # A.Affine(shear=15, p=0.5, mode="constant"),
55
+ ],
56
+ p=1.0,
57
+ ),
58
+ A.HorizontalFlip(p=0.5),
59
+ A.Blur(p=0.1),
60
+ A.CLAHE(p=0.1),
61
+ A.Posterize(p=0.1),
62
+ A.ToGray(p=0.1),
63
+ A.ChannelShuffle(p=0.05),
64
+ A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,),
65
+ ToTensorV2(),
66
+ ],
67
+ bbox_params=A.BboxParams(format="yolo", min_visibility=0.4, label_fields=[],),
68
+ )
69
+ test_transforms = A.Compose(
70
+ [
71
+ A.LongestMaxSize(max_size=IMAGE_SIZE),
72
+ A.PadIfNeeded(
73
+ min_height=IMAGE_SIZE, min_width=IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT
74
+ ),
75
+ A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,),
76
+ ToTensorV2(),
77
+ ],
78
+ bbox_params=A.BboxParams(format="yolo", min_visibility=0.4, label_fields=[]),
79
+ )
80
+
81
+ grad_cam_transforms = A.Compose(
82
+ [
83
+ A.LongestMaxSize(max_size=IMAGE_SIZE),
84
+ A.PadIfNeeded(
85
+ min_height=IMAGE_SIZE, min_width=IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT
86
+ ),
87
+ A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,),
88
+ ToTensorV2(),
89
+ ],
90
+ )
91
+
92
+ PASCAL_CLASSES = [
93
+ "aeroplane",
94
+ "bicycle",
95
+ "bird",
96
+ "boat",
97
+ "bottle",
98
+ "bus",
99
+ "car",
100
+ "cat",
101
+ "chair",
102
+ "cow",
103
+ "diningtable",
104
+ "dog",
105
+ "horse",
106
+ "motorbike",
107
+ "person",
108
+ "pottedplant",
109
+ "sheep",
110
+ "sofa",
111
+ "train",
112
+ "tvmonitor"
113
+ ]
dataset.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Creates a Pytorch dataset to load the Pascal VOC & MS COCO datasets
3
+ """
4
+ import config as cfg
5
+ import numpy as np
6
+ import os
7
+ import pandas as pd
8
+ import torch
9
+ from utils import xywhn2xyxy, xyxy2xywhn
10
+ import random
11
+
12
+ from PIL import Image, ImageFile
13
+ from torch.utils.data import Dataset, DataLoader
14
+ from utils import (
15
+ cells_to_bboxes,
16
+ iou_width_height as iou,
17
+ non_max_suppression as nms,
18
+ plot_image
19
+ )
20
+
21
+ ImageFile.LOAD_TRUNCATED_IMAGES = True
22
+
23
+
24
+ class YOLODataset(Dataset):
25
+ def __init__(
26
+ self,
27
+ csv_file,
28
+ img_dir,
29
+ label_dir,
30
+ anchors,
31
+ image_size=416,
32
+ S=[13, 26, 52],
33
+ C=20,
34
+ transform=None,
35
+ mosaic=True
36
+ ):
37
+ self.annotations = pd.read_csv(csv_file)
38
+ self.img_dir = img_dir
39
+ self.label_dir = label_dir
40
+ self.image_size = image_size
41
+ self.mosaic_border = [image_size // 2, image_size // 2]
42
+ self.transform = transform
43
+ self.S = S
44
+ self.anchors = torch.tensor(anchors[0] + anchors[1] + anchors[2]) # for all 3 scales
45
+ self.num_anchors = self.anchors.shape[0]
46
+ self.num_anchors_per_scale = self.num_anchors // 3
47
+ self.C = C
48
+ self.ignore_iou_thresh = 0.5
49
+ self.mosaic = mosaic
50
+
51
+ def __len__(self):
52
+ return len(self.annotations)
53
+
54
+ def load_mosaic(self, index):
55
+ # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
56
+ labels4 = []
57
+ s = self.image_size
58
+ yc, xc = (int(random.uniform(x, 2*s - x)) for x in self.mosaic_border) # mosaic center x, y
59
+ indices = [index] + random.choices(range(len(self)), k=3) # 3 additional image indices
60
+ random.shuffle(indices)
61
+ for i, index in enumerate(indices):
62
+ # Load image
63
+ label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
64
+ bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
65
+ img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
66
+ img = np.array(Image.open(img_path).convert("RGB"))
67
+
68
+ h, w = img.shape[0], img.shape[1]
69
+ labels = np.array(bboxes)
70
+
71
+ # place img in img4
72
+ if i == 0: # top left
73
+ img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
74
+ x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
75
+ x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
76
+ elif i == 1: # top right
77
+ x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
78
+ x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
79
+ elif i == 2: # bottom left
80
+ x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
81
+ x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
82
+ elif i == 3: # bottom right
83
+ x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
84
+ x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
85
+
86
+ img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
87
+ padw = x1a - x1b
88
+ padh = y1a - y1b
89
+
90
+ # Labels
91
+ if labels.size:
92
+ labels[:, :-1] = xywhn2xyxy(labels[:, :-1], w, h, padw, padh) # normalized xywh to pixel xyxy format
93
+ labels4.append(labels)
94
+
95
+ # Concat/clip labels
96
+ labels4 = np.concatenate(labels4, 0)
97
+ for x in (labels4[:, :-1],):
98
+ np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
99
+ # img4, labels4 = replicate(img4, labels4) # replicate
100
+ labels4[:, :-1] = xyxy2xywhn(labels4[:, :-1], 2 * s, 2 * s)
101
+ labels4[:, :-1] = np.clip(labels4[:, :-1], 0, 1)
102
+ labels4 = labels4[labels4[:, 2] > 0]
103
+ labels4 = labels4[labels4[:, 3] > 0]
104
+ return img4, labels4
105
+
106
+ def __getitem__(self, index):
107
+
108
+ if self.mosaic and random.random() <= 0.75:
109
+ image, bboxes = self.load_mosaic(index)
110
+ else:
111
+ label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
112
+ bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
113
+ img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
114
+ image = np.array(Image.open(img_path).convert("RGB"))
115
+
116
+ if self.transform:
117
+ augmentations = self.transform(image=image, bboxes=bboxes)
118
+ image = augmentations["image"]
119
+ bboxes = augmentations["bboxes"]
120
+
121
+ # Below assumes 3 scale predictions (as paper) and same num of anchors per scale
122
+ targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S]
123
+ for box in bboxes:
124
+ iou_anchors = iou(torch.tensor(box[2:4]), self.anchors)
125
+ anchor_indices = iou_anchors.argsort(descending=True, dim=0)
126
+ x, y, width, height, class_label = box
127
+ has_anchor = [False] * 3 # each scale should have one anchor
128
+ for anchor_idx in anchor_indices:
129
+ scale_idx = anchor_idx // self.num_anchors_per_scale
130
+ anchor_on_scale = anchor_idx % self.num_anchors_per_scale
131
+ S = self.S[scale_idx]
132
+ i, j = int(S * y), int(S * x) # which cell
133
+ anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
134
+ if not anchor_taken and not has_anchor[scale_idx]:
135
+ targets[scale_idx][anchor_on_scale, i, j, 0] = 1
136
+ x_cell, y_cell = S * x - j, S * y - i # both between [0,1]
137
+ width_cell, height_cell = (
138
+ width * S,
139
+ height * S,
140
+ ) # can be greater than 1 since it's relative to cell
141
+ box_coordinates = torch.tensor(
142
+ [x_cell, y_cell, width_cell, height_cell]
143
+ )
144
+ targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
145
+ targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label)
146
+ has_anchor[scale_idx] = True
147
+
148
+ elif not anchor_taken and iou_anchors[anchor_idx] > self.ignore_iou_thresh:
149
+ targets[scale_idx][anchor_on_scale, i, j, 0] = -1 # ignore prediction
150
+
151
+ return image, tuple(targets)
152
+
153
+
154
+ def validate_dataset():
155
+ anchors = cfg.ANCHORS
156
+
157
+ transform = cfg.test_transforms
158
+
159
+ dataset = YOLODataset(
160
+ cfg.SAMPLE_DATASET + "/25examples.csv",
161
+ cfg.SAMPLE_DATASET + "/images/",
162
+ cfg.SAMPLE_DATASET + "/labels/",
163
+ S=[13, 26, 52],
164
+ anchors=anchors,
165
+ transform=transform,
166
+ )
167
+ S = [13, 26, 52]
168
+ scaled_anchors = torch.tensor(anchors) / (
169
+ 1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
170
+ )
171
+ loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True)
172
+ for x, y in loader:
173
+ boxes = []
174
+
175
+ for i in range(y[0].shape[1]):
176
+ anchor = scaled_anchors[i]
177
+ print(anchor.shape)
178
+ print(y[i].shape)
179
+ boxes += cells_to_bboxes(
180
+ y[i], is_preds=False, S=y[i].shape[2], anchors=anchor
181
+ )[0]
182
+ boxes = nms(boxes, iou_threshold=1, threshold=0.7, box_format="midpoint")
183
+ print(boxes)
184
+ plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes)
grad_cam.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ import torch
3
+ import numpy as np
4
+ import utils
5
+ from pytorch_grad_cam.base_cam import BaseCAM
6
+ from pytorch_grad_cam.utils import get_2d_projection
7
+ from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
8
+
9
+ class YoloGradCAM(BaseCAM):
10
+ def __init__(self,
11
+ model,
12
+ target_layers,
13
+ scaled_anchors,
14
+ use_cuda=False,
15
+ reshape_transform=None):
16
+ super(YoloGradCAM, self).__init__(model,
17
+ target_layers,
18
+ use_cuda,
19
+ reshape_transform,
20
+ uses_gradients=False)
21
+
22
+ self.scaled_anchors = scaled_anchors
23
+
24
+ def get_cam_image(self,
25
+ input_tensor: torch.Tensor,
26
+ target_layer: torch.nn.Module,
27
+ targets: List[torch.nn.Module],
28
+ activations: torch.Tensor,
29
+ grads: torch.Tensor,
30
+ eigen_smooth: bool = False) -> np.ndarray:
31
+ return get_2d_projection(activations)
32
+
33
+ def forward(self,
34
+ input_tensor: torch.Tensor,
35
+ targets: List[torch.nn.Module],
36
+ eigen_smooth: bool = False) -> np.ndarray:
37
+
38
+ if self.cuda:
39
+ input_tensor = input_tensor.cuda()
40
+
41
+ if self.compute_input_gradient:
42
+ input_tensor = torch.autograd.Variable(input_tensor,
43
+ requires_grad=True)
44
+
45
+ outputs = self.activations_and_grads(input_tensor)
46
+ if targets is None:
47
+ bboxes = [[] for _ in range(1)]
48
+ for i in range(3):
49
+ batch_size, A, S, _, _ = outputs[i].shape
50
+ anchor = self.scaled_anchors[i]
51
+ boxes_scale_i = utils.cells_to_bboxes(
52
+ outputs[i], anchor, S=S, is_preds=True
53
+ )
54
+ for idx, (box) in enumerate(boxes_scale_i):
55
+ bboxes[idx] += box
56
+
57
+ nms_boxes = utils.non_max_suppression(
58
+ bboxes[0], iou_threshold=0.5, threshold=0.4, box_format="midpoint",
59
+ )
60
+ # target_categories = np.argmax(outputs.cpu().data.numpy(), axis=-1)
61
+ target_categories = [box[0] for box in nms_boxes]
62
+ targets = [ClassifierOutputTarget(
63
+ category) for category in target_categories]
64
+
65
+ if self.uses_gradients:
66
+ self.model.zero_grad()
67
+ loss = sum([target(output)
68
+ for target, output in zip(targets, outputs)])
69
+ loss.backward(retain_graph=True)
70
+
71
+ # In most of the saliency attribution papers, the saliency is
72
+ # computed with a single target layer.
73
+ # Commonly it is the last convolutional layer.
74
+ # Here we support passing a list with multiple target layers.
75
+ # It will compute the saliency image for every image,
76
+ # and then aggregate them (with a default mean aggregation).
77
+ # This gives you more flexibility in case you just want to
78
+ # use all conv layers for example, all Batchnorm layers,
79
+ # or something else.
80
+ cam_per_layer = self.compute_cam_per_layer(input_tensor,
81
+ targets,
82
+ eigen_smooth)
83
+ return self.aggregate_multi_layers(cam_per_layer)
images/000001.jpg ADDED
images/000002.jpg ADDED
images/000003.jpg ADDED
images/000004.jpg ADDED
images/000005.jpg ADDED
images/000006.jpg ADDED
images/000007.jpg ADDED
images/000008.jpg ADDED
images/000009.jpg ADDED
images/000010.jpg ADDED
images/000011.jpg ADDED
images/000012.jpg ADDED
images/000013.jpg ADDED
images/000014.jpg ADDED
images/000015.jpg ADDED
images/000016.jpg ADDED
images/000017.jpg ADDED
images/000018.jpg ADDED
images/000019.jpg ADDED
images/000020.jpg ADDED
images/000021.jpg ADDED
images/000022.jpg ADDED
images/000023.jpg ADDED
images/000024.jpg ADDED
images/000025.jpg ADDED
loss.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Implementation of Yolo Loss Function similar to the one in Yolov3 paper,
3
+ the difference from what I can tell is I use CrossEntropy for the classes
4
+ instead of BinaryCrossEntropy.
5
+ """
6
+ import random
7
+ import torch
8
+ import torch.nn as nn
9
+ import pytorch_lightning as pl
10
+ from utils import intersection_over_union
11
+ import config as cfg
12
+
13
+
14
+ class YoloLoss(pl.LightningModule):
15
+ def __init__(self):
16
+ super().__init__()
17
+ self.mse = nn.MSELoss()
18
+ self.bce = nn.BCEWithLogitsLoss()
19
+ self.entropy = nn.CrossEntropyLoss()
20
+ self.sigmoid = nn.Sigmoid()
21
+
22
+ # Constants signifying how much to pay for each respective part of the loss
23
+ self.lambda_class = 1
24
+ self.lambda_noobj = 10
25
+ self.lambda_obj = 1
26
+ self.lambda_box = 10
27
+
28
+ self.scaled_anchors = (
29
+ torch.tensor(cfg.ANCHORS)
30
+ * torch.tensor(cfg.S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
31
+ )
32
+
33
+ def forward(self, predictions_list, target_list, **kwargs):
34
+
35
+ anchors_list = kwargs.get('anchors_list', None)
36
+ if not anchors_list:
37
+ anchors_list = self.scaled_anchors
38
+
39
+ anchors_list = anchors_list.to(cfg.DEVICE)
40
+
41
+ box_loss = 0.0
42
+ object_loss = 0.0
43
+ no_object_loss = 0.0
44
+ class_loss = 0.0
45
+
46
+ for i in range(3):
47
+ target = target_list[i]
48
+ predictions = predictions_list[i]
49
+ anchors = anchors_list[i]
50
+
51
+ # Check where obj and noobj (we ignore if target == -1)
52
+ obj = target[..., 0] == 1 # in paper this is Iobj_i
53
+ noobj = target[..., 0] == 0 # in paper this is Inoobj_i
54
+
55
+ # ======================= #
56
+ # FOR NO OBJECT LOSS #
57
+ # ======================= #
58
+
59
+ no_object_loss += self.bce(
60
+ (predictions[..., 0:1][noobj]), (target[..., 0:1][noobj]),
61
+ )
62
+
63
+ # ==================== #
64
+ # FOR OBJECT LOSS #
65
+ # ==================== #
66
+
67
+ anchors = anchors.reshape(1, 3, 1, 1, 2)
68
+ box_preds = torch.cat([self.sigmoid(predictions[..., 1:3]), torch.exp(predictions[..., 3:5]) * anchors], dim=-1)
69
+ ious = intersection_over_union(box_preds[obj], target[..., 1:5][obj]).detach()
70
+ object_loss += self.mse(self.sigmoid(predictions[..., 0:1][obj]), ious * target[..., 0:1][obj])
71
+
72
+ # ======================== #
73
+ # FOR BOX COORDINATES #
74
+ # ======================== #
75
+
76
+ predictions[..., 1:3] = self.sigmoid(predictions[..., 1:3]) # x,y coordinates
77
+ target[..., 3:5] = torch.log(
78
+ (1e-16 + target[..., 3:5] / anchors)
79
+ ) # width, height coordinates
80
+ box_loss += self.mse(predictions[..., 1:5][obj], target[..., 1:5][obj])
81
+
82
+ # ================== #
83
+ # FOR CLASS LOSS #
84
+ # ================== #
85
+
86
+ class_loss += self.entropy(
87
+ (predictions[..., 5:][obj]), (target[..., 5][obj].long()),
88
+ )
89
+
90
+ #print("__________________________________")
91
+ #print(self.lambda_box * box_loss)
92
+ #print(self.lambda_obj * object_loss)
93
+ #print(self.lambda_noobj * no_object_loss)
94
+ #print(self.lambda_class * class_loss)
95
+ #print("\n")
96
+
97
+ total_loss = (
98
+ self.lambda_box * box_loss
99
+ + self.lambda_obj * object_loss
100
+ + self.lambda_noobj * no_object_loss
101
+ + self.lambda_class * class_loss
102
+ )
103
+
104
+ if kwargs.get('loss_dict'):
105
+ return dict(class_loss=self.lambda_class * class_loss,
106
+ no_object_loss=self.lambda_noobj * no_object_loss,
107
+ object_loss=self.lambda_obj * object_loss,
108
+ box_loss=self.lambda_box * box_loss,
109
+ total_loss=total_loss
110
+ )
111
+ else:
112
+ return total_loss
113
+
114
+
115
+ def check_class_accuracy(self, predictions, target, threshold):
116
+ tot_class_preds, correct_class = 0, 0
117
+ tot_noobj, correct_noobj = 0, 0
118
+ tot_obj, correct_obj = 0, 0
119
+
120
+ y = target
121
+ out = predictions
122
+
123
+ for i in range(3):
124
+ obj = y[i][..., 0] == 1 # in paper this is Iobj_i
125
+ noobj = y[i][..., 0] == 0 # in paper this is Iobj_i
126
+
127
+ correct_class += torch.sum(
128
+ torch.argmax(out[i][..., 5:][obj], dim=-1) == y[i][..., 5][obj]
129
+ )
130
+ tot_class_preds += torch.sum(obj)
131
+
132
+ obj_preds = torch.sigmoid(out[i][..., 0]) > threshold
133
+ correct_obj += torch.sum(obj_preds[obj] == y[i][..., 0][obj])
134
+ tot_obj += torch.sum(obj)
135
+ correct_noobj += torch.sum(obj_preds[noobj] == y[i][..., 0][noobj])
136
+ tot_noobj += torch.sum(noobj)
137
+
138
+ return dict(
139
+ correct_class=correct_class,
140
+ correct_noobj=correct_noobj,
141
+ correct_obj=correct_obj,
142
+ total_class_preds=tot_class_preds,
143
+ total_noobj=tot_noobj,
144
+ total_obj=tot_obj
145
+ )
146
+
147
+ '''print(f"Class accuracy is: {(correct_class/(tot_class_preds+1e-16))*100:2f}%")
148
+ print(f"No obj accuracy is: {(correct_noobj/(tot_noobj+1e-16))*100:2f}%")
149
+ print(f"Obj accuracy is: {(correct_obj/(tot_obj+1e-16))*100:2f}%")'''
main.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataset import *
2
+ from models.YoloV3Lightning import *
3
+ import utils
4
+
5
+ def init(model, basic_sanity_check=True, find_max_lr=True, train=True, **kwargs):
6
+ if basic_sanity_check:
7
+ validate_dataset()
8
+ sanity_check(model)
9
+ print("Set basic_sanity_check to False to proceed")
10
+ else:
11
+ if find_max_lr:
12
+ optimizer = kwargs.get('optimizer')
13
+ criterion = kwargs.get('criterion')
14
+ train_loader = kwargs.get('train_loader')
15
+ utils.find_lr(model, optimizer, criterion, train_loader)
16
+ print("Set find_max_lr to False to proceed further")
17
+ else:
18
+
19
+ train_loader = kwargs.get('train_loader')
20
+ val_loader = kwargs.get('test_loader')
21
+
22
+ if train:
23
+ trainer = pl.Trainer(
24
+ precision=16,
25
+ max_epochs=cfg.NUM_EPOCHS,
26
+ accelerator='gpu'
27
+ )
28
+
29
+ cargs = {}
30
+ if cfg.LOAD_MODEL:
31
+ cargs = dict(ckpt_path=cfg.CHECKPOINT_FILE)
32
+
33
+ trainer.fit(model, train_loader, val_loader, **cargs)
34
+ else:
35
+ ckpt_file = kwargs.get('ckpt_file')
36
+ if ckpt_file:
37
+ checkpoint = utils.load_model_from_checkpoint(cfg.DEVICE, file_name=ckpt_file)
38
+ model.load_state_dict(checkpoint['model'], strict=False)
39
+
40
+ #-- Printing samples
41
+ model.to(cfg.DEVICE)
42
+ model.eval()
43
+ cfg.IMG_DIR = cfg.DATASET + "/images/"
44
+ cfg.LABEL_DIR = cfg.DATASET + "/labels/"
45
+ eval_dataset = YOLODataset(
46
+ cfg.DATASET + "/test.csv",
47
+ transform=cfg.test_transforms,
48
+ S=[cfg.IMAGE_SIZE // 32, cfg.IMAGE_SIZE // 16, cfg.IMAGE_SIZE // 8],
49
+ img_dir=cfg.IMG_DIR,
50
+ label_dir=cfg.LABEL_DIR,
51
+ anchors=cfg.ANCHORS,
52
+ mosaic=False
53
+ )
54
+ eval_loader = DataLoader(
55
+ dataset=eval_dataset,
56
+ batch_size=cfg.BATCH_SIZE,
57
+ num_workers=cfg.NUM_WORKERS,
58
+ pin_memory=cfg.PIN_MEMORY,
59
+ shuffle=True,
60
+ drop_last=False,
61
+ )
62
+
63
+ scaled_anchors = (
64
+ torch.tensor(cfg.ANCHORS)
65
+ * torch.tensor(cfg.S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
66
+ )
67
+ scaled_anchors = scaled_anchors.to(cfg.DEVICE)
68
+
69
+ utils.plot_examples(model, eval_loader, 0.5, 0.6, scaled_anchors)
70
+
71
+ # -- Printing MAP
72
+ pred_boxes, true_boxes = utils.get_evaluation_bboxes(
73
+ eval_loader,
74
+ model,
75
+ iou_threshold=cfg.NMS_IOU_THRESH,
76
+ anchors=cfg.ANCHORS,
77
+ threshold=cfg.CONF_THRESHOLD,
78
+ )
79
+ mapval = utils.mean_average_precision(
80
+ pred_boxes,
81
+ true_boxes,
82
+ iou_threshold=cfg.MAP_IOU_THRESH,
83
+ box_format="midpoint",
84
+ num_classes=cfg.NUM_CLASSES,
85
+ )
86
+ print(f"MAP: {mapval.item()}")
models/YoloV3Lightning.py ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import pytorch_lightning as pl
4
+ from pytorch_grad_cam import GradCAM
5
+ from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
6
+ from pytorch_grad_cam.utils.image import show_cam_on_image
7
+ import numpy as np
8
+ from torchvision import transforms
9
+
10
+ import matplotlib.pyplot as plt
11
+
12
+ from loss import YoloLoss
13
+ import config as cfg
14
+
15
+ """
16
+ Information about architecture config:
17
+ Tuple is structured by (filters, kernel_size, stride)
18
+ Every conv is a same convolution.
19
+ List is structured by "B" indicating a residual block followed by the number of repeats
20
+ "S" is for scale prediction block and computing the yolo loss
21
+ "U" is for upsampling the feature map and concatenating with a previous layer
22
+ """
23
+ config = [
24
+ (32, 3, 1),
25
+ (64, 3, 2),
26
+ ["B", 1],
27
+ (128, 3, 2),
28
+ ["B", 2],
29
+ (256, 3, 2),
30
+ ["B", 8],
31
+ (512, 3, 2),
32
+ ["B", 8],
33
+ (1024, 3, 2),
34
+ ["B", 4], # To this point is Darknet-53
35
+ (512, 1, 1),
36
+ (1024, 3, 1),
37
+ "S",
38
+ (256, 1, 1),
39
+ "U",
40
+ (256, 1, 1),
41
+ (512, 3, 1),
42
+ "S",
43
+ (128, 1, 1),
44
+ "U",
45
+ (128, 1, 1),
46
+ (256, 3, 1),
47
+ "S",
48
+ ]
49
+
50
+
51
+ class CNNBlock(nn.Module):
52
+ def __init__(self, in_channels, out_channels, bn_act=True, **kwargs):
53
+ super().__init__()
54
+ self.conv = nn.Conv2d(in_channels, out_channels, bias=not bn_act, **kwargs)
55
+ self.bn = nn.BatchNorm2d(out_channels)
56
+ self.leaky = nn.LeakyReLU(0.1)
57
+ self.use_bn_act = bn_act
58
+
59
+ def forward(self, x):
60
+ if self.use_bn_act:
61
+ return self.leaky(self.bn(self.conv(x)))
62
+ else:
63
+ return self.conv(x)
64
+
65
+
66
+ class ResidualBlock(nn.Module):
67
+ def __init__(self, channels, use_residual=True, num_repeats=1):
68
+ super().__init__()
69
+ self.layers = nn.ModuleList()
70
+ for repeat in range(num_repeats):
71
+ self.layers += [
72
+ nn.Sequential(
73
+ CNNBlock(channels, channels // 2, kernel_size=1),
74
+ CNNBlock(channels // 2, channels, kernel_size=3, padding=1),
75
+ )
76
+ ]
77
+
78
+ self.use_residual = use_residual
79
+ self.num_repeats = num_repeats
80
+
81
+ def forward(self, x):
82
+ for layer in self.layers:
83
+ if self.use_residual:
84
+ x = x + layer(x)
85
+ else:
86
+ x = layer(x)
87
+
88
+ return x
89
+
90
+
91
+ class ScalePrediction(nn.Module):
92
+ def __init__(self, in_channels, num_classes):
93
+ super().__init__()
94
+ self.pred = nn.Sequential(
95
+ CNNBlock(in_channels, 2 * in_channels, kernel_size=3, padding=1),
96
+ CNNBlock(
97
+ 2 * in_channels, (num_classes + 5) * 3, bn_act=False, kernel_size=1
98
+ ),
99
+ )
100
+ self.num_classes = num_classes
101
+
102
+ def forward(self, x):
103
+ return (
104
+ self.pred(x)
105
+ .reshape(x.shape[0], 3, self.num_classes + 5, x.shape[2], x.shape[3])
106
+ .permute(0, 1, 3, 4, 2)
107
+ )
108
+
109
+
110
+ class YOLOv3LightningModel(pl.LightningModule):
111
+ def __init__(self, in_channels=3, num_classes=20, anchors=None, S=None):
112
+ super().__init__()
113
+ self.num_classes = num_classes
114
+ self.in_channels = in_channels
115
+ self.layers = self._create_conv_layers()
116
+ self.anchor_list = (
117
+ torch.tensor(anchors)
118
+ * torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
119
+ )
120
+
121
+ self.criterion = YoloLoss()
122
+
123
+ self.metric = dict(
124
+ total_train_steps=0,
125
+ epoch_train_loss=[],
126
+ epoch_train_acc=[],
127
+ epoch_train_steps=0,
128
+ total_val_steps=0,
129
+ epoch_val_loss=[],
130
+ epoch_val_acc=[],
131
+ epoch_val_steps=0,
132
+ train_loss=[],
133
+ val_loss=[],
134
+ train_acc=[],
135
+ val_acc=[]
136
+ )
137
+
138
+ def forward(self, x):
139
+ outputs = [] # for each scale
140
+ route_connections = []
141
+ for layer in self.layers:
142
+ if isinstance(layer, ScalePrediction):
143
+ outputs.append(layer(x))
144
+ continue
145
+
146
+ x = layer(x)
147
+
148
+ if isinstance(layer, ResidualBlock) and layer.num_repeats == 8:
149
+ route_connections.append(x)
150
+
151
+ elif isinstance(layer, nn.Upsample):
152
+ x = torch.cat([x, route_connections[-1]], dim=1)
153
+ route_connections.pop()
154
+
155
+ return outputs
156
+
157
+ def _create_conv_layers(self):
158
+ layers = nn.ModuleList()
159
+ in_channels = self.in_channels
160
+
161
+ for module in config:
162
+ if isinstance(module, tuple):
163
+ out_channels, kernel_size, stride = module
164
+ layers.append(
165
+ CNNBlock(
166
+ in_channels,
167
+ out_channels,
168
+ kernel_size=kernel_size,
169
+ stride=stride,
170
+ padding=1 if kernel_size == 3 else 0,
171
+ )
172
+ )
173
+ in_channels = out_channels
174
+
175
+ elif isinstance(module, list):
176
+ num_repeats = module[1]
177
+ layers.append(ResidualBlock(in_channels, num_repeats=num_repeats,))
178
+
179
+ elif isinstance(module, str):
180
+ if module == "S":
181
+ layers += [
182
+ ResidualBlock(in_channels, use_residual=False, num_repeats=1),
183
+ CNNBlock(in_channels, in_channels // 2, kernel_size=1),
184
+ ScalePrediction(in_channels // 2, num_classes=self.num_classes),
185
+ ]
186
+ in_channels = in_channels // 2
187
+
188
+ elif module == "U":
189
+ layers.append(nn.Upsample(scale_factor=2),)
190
+ in_channels = in_channels * 3
191
+
192
+ return layers
193
+
194
+
195
+ def get_layer(self, idx):
196
+ if idx < len(self.layers) and idx >= 0:
197
+ return self.layers[idx]
198
+
199
+
200
+
201
+ def training_step(self, train_batch, batch_idx):
202
+ x, target = train_batch
203
+ output = self.forward(x)
204
+ loss = self.criterion(output, target, loss_dict=True, anchor_list=self.anchor_list)
205
+ acc = self.criterion.check_class_accuracy(output, target, cfg.CONF_THRESHOLD)
206
+
207
+ self.metric['total_train_steps'] += 1
208
+ self.metric['epoch_train_steps'] += 1
209
+ self.metric['epoch_train_loss'].append(loss)
210
+ self.metric['epoch_train_acc'].append(acc)
211
+
212
+ self.log_dict({'train_loss': loss['total_loss']})
213
+
214
+ return loss['total_loss']
215
+
216
+
217
+ def validation_step(self, val_batch, batch_idx):
218
+ x, target = val_batch
219
+ output = self.forward(x)
220
+ loss = self.criterion(output, target, loss_dict=True, anchor_list=self.anchor_list)
221
+ acc = self.criterion.check_class_accuracy(output, target, cfg.CONF_THRESHOLD)
222
+
223
+ self.metric['total_val_steps'] += 1
224
+ self.metric['epoch_val_steps'] += 1
225
+ self.metric['epoch_val_loss'].append(loss)
226
+ self.metric['epoch_val_acc'].append(acc)
227
+
228
+ self.log_dict({'val_loss': loss['total_loss']})
229
+
230
+
231
+ def on_validation_epoch_end(self):
232
+ if self.metric['total_train_steps']:
233
+ print('Epoch ', self.current_epoch)
234
+ epoch_loss = 0
235
+ epoch_acc = dict(
236
+ correct_class=0,
237
+ correct_noobj=0,
238
+ correct_obj=0,
239
+ total_class_preds=0,
240
+ total_noobj=0,
241
+ total_obj=0
242
+ )
243
+ for i in range(self.metric['epoch_train_steps']):
244
+ lo = self.metric['epoch_train_loss'][i]
245
+ epoch_loss += lo['total_loss']
246
+ acc = self.metric['epoch_train_acc'][i]
247
+ epoch_acc['correct_class'] += acc['correct_class']
248
+ epoch_acc['correct_noobj'] += acc['correct_noobj']
249
+ epoch_acc['correct_obj'] += acc['correct_obj']
250
+ epoch_acc['total_class_preds'] += acc['total_class_preds']
251
+ epoch_acc['total_noobj'] += acc['total_noobj']
252
+ epoch_acc['total_obj'] += acc['total_obj']
253
+
254
+
255
+ print("Train -")
256
+ print(f"Class accuracy is: {(epoch_acc['correct_class']/(epoch_acc['total_class_preds']+1e-16))*100:2f}%")
257
+ print(f"No obj accuracy is: {(epoch_acc['correct_noobj']/(epoch_acc['total_noobj']+1e-16))*100:2f}%")
258
+ print(f"Obj accuracy is: {(epoch_acc['correct_obj']/(epoch_acc['total_obj']+1e-16))*100:2f}%")
259
+ print(f"Total loss: {(epoch_loss/(len(self.metric['epoch_train_loss'])+1e-16)):2f}")
260
+
261
+ self.metric['epoch_train_loss'] = []
262
+ self.metric['epoch_train_acc'] = []
263
+ self.metric['epoch_train_steps'] = 0
264
+
265
+ #---
266
+ epoch_loss = 0
267
+ epoch_acc = dict(
268
+ correct_class=0,
269
+ correct_noobj=0,
270
+ correct_obj=0,
271
+ total_class_preds=0,
272
+ total_noobj=0,
273
+ total_obj=0
274
+ )
275
+ for i in range(self.metric['epoch_val_steps']):
276
+ lo = self.metric['epoch_val_loss'][i]
277
+ epoch_loss += lo['total_loss']
278
+ acc = self.metric['epoch_val_acc'][i]
279
+ epoch_acc['correct_class'] += acc['correct_class']
280
+ epoch_acc['correct_noobj'] += acc['correct_noobj']
281
+ epoch_acc['correct_obj'] += acc['correct_obj']
282
+ epoch_acc['total_class_preds'] += acc['total_class_preds']
283
+ epoch_acc['total_noobj'] += acc['total_noobj']
284
+ epoch_acc['total_obj'] += acc['total_obj']
285
+
286
+ print("Validation -")
287
+ print(f"Class accuracy is: {(epoch_acc['correct_class']/(epoch_acc['total_class_preds']+1e-16))*100:2f}%")
288
+ print(f"No obj accuracy is: {(epoch_acc['correct_noobj']/(epoch_acc['total_noobj']+1e-16))*100:2f}%")
289
+ print(f"Obj accuracy is: {(epoch_acc['correct_obj']/(epoch_acc['total_obj']+1e-16))*100:2f}%")
290
+ print(f"Total loss: {(epoch_loss/(len(self.metric['epoch_val_loss'])+1e-16)):2f}")
291
+
292
+ self.metric['epoch_val_loss'] = []
293
+ self.metric['epoch_val_acc'] = []
294
+ self.metric['epoch_val_steps'] = 0
295
+
296
+ print("Creating checkpoint...")
297
+ self.trainer.save_checkpoint(cfg.CHECKPOINT_FILE)
298
+
299
+
300
+ def test_step(self, test_batch, batch_idx):
301
+ self.validation_step(test_batch, batch_idx)
302
+
303
+ def train_dataloader(self):
304
+ if not self.trainer.train_dataloader:
305
+ self.trainer.fit_loop.setup_data()
306
+
307
+ return self.trainer.train_dataloader
308
+
309
+ def configure_optimizers(self):
310
+ optimizer = torch.optim.Adam(self.parameters(), lr=cfg.LEARNING_RATE, weight_decay=cfg.WEIGHT_DECAY)
311
+ scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,
312
+ max_lr=cfg.LEARNING_RATE,
313
+ epochs=self.trainer.max_epochs,
314
+ steps_per_epoch=len(self.train_dataloader()),
315
+ pct_start=8 / self.trainer.max_epochs,
316
+ div_factor=100,
317
+ final_div_factor=100,
318
+ three_phase=False,
319
+ verbose=False
320
+ )
321
+ return {
322
+ "optimizer": optimizer,
323
+ "lr_scheduler": {
324
+ "scheduler": scheduler,
325
+ 'interval': 'step', # or 'epoch'
326
+ 'frequency': 1
327
+ },
328
+ }
329
+
330
+
331
+ def plot_grad_cam(self, img, target_layers, grad_opacity=1.0):
332
+ mean, std = [0, 0, 0], [1, 1, 1]
333
+
334
+ cam = GradCAM(model=self, target_layers=target_layers)
335
+
336
+ transform = transforms.ToTensor()
337
+ img = transform(img)
338
+
339
+ if self.device != img.device:
340
+ img = img.to(self.device)
341
+
342
+ x = img.unsqueeze(0)
343
+
344
+ out = self.forward(img)
345
+ bboxes = []
346
+ #fig = plt.figure()
347
+ for i in range(count):
348
+ plt.subplot(int(count / 5), 5, i + 1)
349
+ plt.tight_layout()
350
+
351
+ targets = [ClassifierOutputTarget(pred_dict['ground_truths'][i].cpu().item())]
352
+
353
+ grayscale_cam = cam(input_tensor=pred_dict['images'][i][None, :].cpu(), targets=targets)
354
+
355
+ x = denormalize(pred_dict['images'][i].cpu(), mean, std)
356
+
357
+ image = np.array(255 * x, np.int16).transpose(1, 2, 0)
358
+ img_tensor = np.array(x, np.float16).transpose(1, 2, 0)
359
+
360
+ visualization = show_cam_on_image(img_tensor, grayscale_cam.transpose(1, 2, 0), use_rgb=True,
361
+ image_weight=(1.0 - grad_opacity) )
362
+
363
+ plt.imshow(image, vmin=0, vmax=255)
364
+ plt.imshow(visualization, vmin=0, vmax=255, alpha=grad_opacity)
365
+ plt.xticks([])
366
+ plt.yticks([])
367
+
368
+ title = get_data_label_name(pred_dict['ground_truths'][i].item()) + ' / ' + \
369
+ get_data_label_name(pred_dict['predicted_vals'][i].item())
370
+ plt.title(title, fontsize=8)
371
+
372
+ def sanity_check(model):
373
+ x = torch.randn((2, 3, cfg.IMAGE_SIZE, cfg.IMAGE_SIZE))
374
+ out = model(x)
375
+ assert model(x)[0].shape == (2, 3, cfg.IMAGE_SIZE // 32, cfg.IMAGE_SIZE // 32, cfg.NUM_CLASSES + 5)
376
+ assert model(x)[1].shape == (2, 3, cfg.IMAGE_SIZE // 16, cfg.IMAGE_SIZE // 16, cfg.NUM_CLASSES + 5)
377
+ assert model(x)[2].shape == (2, 3, cfg.IMAGE_SIZE // 8, cfg.IMAGE_SIZE // 8, cfg.NUM_CLASSES + 5)
378
+ print("Success!")
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ torch-lr-finder
3
+ torchvision
4
+ pillow
5
+ gradio
6
+ grad-cam
7
+ numpy
8
+ pytorch-lightning
utils.py ADDED
@@ -0,0 +1,625 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ import cv2
4
+ from torch_lr_finder import LRFinder
5
+
6
+ import config as cfg
7
+ import matplotlib.pyplot as plt
8
+ import matplotlib.patches as patches
9
+ import numpy as np
10
+ import os
11
+ import random
12
+ import torch
13
+
14
+ from collections import Counter
15
+ from torch.utils.data import DataLoader
16
+ from tqdm import tqdm
17
+
18
+
19
+
20
+
21
+ def iou_width_height(boxes1, boxes2):
22
+ """
23
+ Parameters:
24
+ boxes1 (tensor): width and height of the first bounding boxes
25
+ boxes2 (tensor): width and height of the second bounding boxes
26
+ Returns:
27
+ tensor: Intersection over union of the corresponding boxes
28
+ """
29
+ intersection = torch.min(boxes1[..., 0], boxes2[..., 0]) * torch.min(
30
+ boxes1[..., 1], boxes2[..., 1]
31
+ )
32
+ union = (
33
+ boxes1[..., 0] * boxes1[..., 1] + boxes2[..., 0] * boxes2[..., 1] - intersection
34
+ )
35
+ return intersection / union
36
+
37
+
38
+ def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
39
+ """
40
+ Video explanation of this function:
41
+ https://youtu.be/XXYG5ZWtjj0
42
+
43
+ This function calculates intersection over union (iou) given pred boxes
44
+ and target boxes.
45
+
46
+ Parameters:
47
+ boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
48
+ boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4)
49
+ box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2)
50
+
51
+ Returns:
52
+ tensor: Intersection over union for all examples
53
+ """
54
+
55
+ if box_format == "midpoint":
56
+ box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
57
+ box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
58
+ box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
59
+ box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
60
+ box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
61
+ box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
62
+ box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
63
+ box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2
64
+
65
+ if box_format == "corners":
66
+ box1_x1 = boxes_preds[..., 0:1]
67
+ box1_y1 = boxes_preds[..., 1:2]
68
+ box1_x2 = boxes_preds[..., 2:3]
69
+ box1_y2 = boxes_preds[..., 3:4]
70
+ box2_x1 = boxes_labels[..., 0:1]
71
+ box2_y1 = boxes_labels[..., 1:2]
72
+ box2_x2 = boxes_labels[..., 2:3]
73
+ box2_y2 = boxes_labels[..., 3:4]
74
+
75
+ x1 = torch.max(box1_x1, box2_x1)
76
+ y1 = torch.max(box1_y1, box2_y1)
77
+ x2 = torch.min(box1_x2, box2_x2)
78
+ y2 = torch.min(box1_y2, box2_y2)
79
+
80
+ intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
81
+ box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
82
+ box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
83
+
84
+ return intersection / (box1_area + box2_area - intersection + 1e-6)
85
+
86
+
87
+ def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners"):
88
+ """
89
+ Video explanation of this function:
90
+ https://youtu.be/YDkjWEN8jNA
91
+
92
+ Does Non Max Suppression given bboxes
93
+
94
+ Parameters:
95
+ bboxes (list): list of lists containing all bboxes with each bboxes
96
+ specified as [class_pred, prob_score, x1, y1, x2, y2]
97
+ iou_threshold (float): threshold where predicted bboxes is correct
98
+ threshold (float): threshold to remove predicted bboxes (independent of IoU)
99
+ box_format (str): "midpoint" or "corners" used to specify bboxes
100
+
101
+ Returns:
102
+ list: bboxes after performing NMS given a specific IoU threshold
103
+ """
104
+
105
+ assert type(bboxes) == list
106
+
107
+ bboxes = [box for box in bboxes if box[1] > threshold]
108
+ bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
109
+ bboxes_after_nms = []
110
+
111
+ while bboxes:
112
+ chosen_box = bboxes.pop(0)
113
+
114
+ bboxes = [
115
+ box
116
+ for box in bboxes
117
+ if box[0] != chosen_box[0]
118
+ or intersection_over_union(
119
+ torch.tensor(chosen_box[2:]),
120
+ torch.tensor(box[2:]),
121
+ box_format=box_format,
122
+ )
123
+ < iou_threshold
124
+ ]
125
+
126
+ bboxes_after_nms.append(chosen_box)
127
+
128
+ return bboxes_after_nms
129
+
130
+
131
+ def mean_average_precision(
132
+ pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20
133
+ ):
134
+ """
135
+ Video explanation of this function:
136
+ https://youtu.be/FppOzcDvaDI
137
+
138
+ This function calculates mean average precision (mAP)
139
+
140
+ Parameters:
141
+ pred_boxes (list): list of lists containing all bboxes with each bboxes
142
+ specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2]
143
+ true_boxes (list): Similar as pred_boxes except all the correct ones
144
+ iou_threshold (float): threshold where predicted bboxes is correct
145
+ box_format (str): "midpoint" or "corners" used to specify bboxes
146
+ num_classes (int): number of classes
147
+
148
+ Returns:
149
+ float: mAP value across all classes given a specific IoU threshold
150
+ """
151
+
152
+ # list storing all AP for respective classes
153
+ average_precisions = []
154
+
155
+ # used for numerical stability later on
156
+ epsilon = 1e-6
157
+
158
+ for c in range(num_classes):
159
+ detections = []
160
+ ground_truths = []
161
+
162
+ # Go through all predictions and targets,
163
+ # and only add the ones that belong to the
164
+ # current class c
165
+ for detection in pred_boxes:
166
+ if detection[1] == c:
167
+ detections.append(detection)
168
+
169
+ for true_box in true_boxes:
170
+ if true_box[1] == c:
171
+ ground_truths.append(true_box)
172
+
173
+ # find the amount of bboxes for each training example
174
+ # Counter here finds how many ground truth bboxes we get
175
+ # for each training example, so let's say img 0 has 3,
176
+ # img 1 has 5 then we will obtain a dictionary with:
177
+ # amount_bboxes = {0:3, 1:5}
178
+ amount_bboxes = Counter([gt[0] for gt in ground_truths])
179
+
180
+ # We then go through each key, val in this dictionary
181
+ # and convert to the following (w.r.t same example):
182
+ # ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
183
+ for key, val in amount_bboxes.items():
184
+ amount_bboxes[key] = torch.zeros(val)
185
+
186
+ # sort by box probabilities which is index 2
187
+ detections.sort(key=lambda x: x[2], reverse=True)
188
+ TP = torch.zeros((len(detections)))
189
+ FP = torch.zeros((len(detections)))
190
+ total_true_bboxes = len(ground_truths)
191
+
192
+ # If none exists for this class then we can safely skip
193
+ if total_true_bboxes == 0:
194
+ continue
195
+
196
+ for detection_idx, detection in enumerate(detections):
197
+ # Only take out the ground_truths that have the same
198
+ # training idx as detection
199
+ ground_truth_img = [
200
+ bbox for bbox in ground_truths if bbox[0] == detection[0]
201
+ ]
202
+
203
+ num_gts = len(ground_truth_img)
204
+ best_iou = 0
205
+
206
+ for idx, gt in enumerate(ground_truth_img):
207
+ iou = intersection_over_union(
208
+ torch.tensor(detection[3:]),
209
+ torch.tensor(gt[3:]),
210
+ box_format=box_format,
211
+ )
212
+
213
+ if iou > best_iou:
214
+ best_iou = iou
215
+ best_gt_idx = idx
216
+
217
+ if best_iou > iou_threshold:
218
+ # only detect ground truth detection once
219
+ if amount_bboxes[detection[0]][best_gt_idx] == 0:
220
+ # true positive and add this bounding box to seen
221
+ TP[detection_idx] = 1
222
+ amount_bboxes[detection[0]][best_gt_idx] = 1
223
+ else:
224
+ FP[detection_idx] = 1
225
+
226
+ # if IOU is lower then the detection is a false positive
227
+ else:
228
+ FP[detection_idx] = 1
229
+
230
+ TP_cumsum = torch.cumsum(TP, dim=0)
231
+ FP_cumsum = torch.cumsum(FP, dim=0)
232
+ recalls = TP_cumsum / (total_true_bboxes + epsilon)
233
+ precisions = TP_cumsum / (TP_cumsum + FP_cumsum + epsilon)
234
+ precisions = torch.cat((torch.tensor([1]), precisions))
235
+ recalls = torch.cat((torch.tensor([0]), recalls))
236
+ # torch.trapz for numerical integration
237
+ average_precisions.append(torch.trapz(precisions, recalls))
238
+
239
+ return sum(average_precisions) / len(average_precisions)
240
+
241
+
242
+ def plot_image(image, boxes):
243
+ """Plots predicted bounding boxes on the image"""
244
+ cmap = plt.get_cmap("tab20b")
245
+ class_labels = cfg.COCO_LABELS if cfg.DATASET=='COCO' else cfg.PASCAL_CLASSES
246
+ colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
247
+ im = np.array(image)
248
+ height, width, _ = im.shape
249
+
250
+ # Create figure and axes
251
+ fig, ax = plt.subplots(1)
252
+ # Display the image
253
+ ax.imshow(im)
254
+
255
+ # box[0] is x midpoint, box[2] is width
256
+ # box[1] is y midpoint, box[3] is height
257
+
258
+ # Create a Rectangle patch
259
+ for box in boxes:
260
+ assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height"
261
+ class_pred = box[0]
262
+ box = box[2:]
263
+ upper_left_x = box[0] - box[2] / 2
264
+ upper_left_y = box[1] - box[3] / 2
265
+ rect = patches.Rectangle(
266
+ (upper_left_x * width, upper_left_y * height),
267
+ box[2] * width,
268
+ box[3] * height,
269
+ linewidth=2,
270
+ edgecolor=colors[int(class_pred)],
271
+ facecolor="none",
272
+ )
273
+ # Add the patch to the Axes
274
+ ax.add_patch(rect)
275
+ plt.text(
276
+ upper_left_x * width,
277
+ upper_left_y * height,
278
+ s=class_labels[int(class_pred)],
279
+ color="white",
280
+ verticalalignment="top",
281
+ bbox={"color": colors[int(class_pred)], "pad": 0},
282
+ )
283
+
284
+ plt.show()
285
+
286
+
287
+ def get_evaluation_bboxes(
288
+ loader,
289
+ model,
290
+ iou_threshold,
291
+ anchors,
292
+ threshold,
293
+ box_format="midpoint",
294
+ device="cuda",
295
+ ):
296
+ # make sure model is in eval before get bboxes
297
+ model.eval()
298
+ train_idx = 0
299
+ all_pred_boxes = []
300
+ all_true_boxes = []
301
+ for batch_idx, (x, labels) in enumerate(tqdm(loader)):
302
+ x = x.to(device)
303
+
304
+ with torch.no_grad():
305
+ predictions = model(x)
306
+
307
+ batch_size = x.shape[0]
308
+ bboxes = [[] for _ in range(batch_size)]
309
+ for i in range(3):
310
+ S = predictions[i].shape[2]
311
+ anchor = torch.tensor([*anchors[i]]).to(device) * S
312
+ boxes_scale_i = cells_to_bboxes(
313
+ predictions[i], anchor, S=S, is_preds=True
314
+ )
315
+ for idx, (box) in enumerate(boxes_scale_i):
316
+ bboxes[idx] += box
317
+
318
+ # we just want one bbox for each label, not one for each scale
319
+ true_bboxes = cells_to_bboxes(
320
+ labels[2], anchor, S=S, is_preds=False
321
+ )
322
+
323
+ for idx in range(batch_size):
324
+ nms_boxes = non_max_suppression(
325
+ bboxes[idx],
326
+ iou_threshold=iou_threshold,
327
+ threshold=threshold,
328
+ box_format=box_format,
329
+ )
330
+
331
+ for nms_box in nms_boxes:
332
+ all_pred_boxes.append([train_idx] + nms_box)
333
+
334
+ for box in true_bboxes[idx]:
335
+ if box[1] > threshold:
336
+ all_true_boxes.append([train_idx] + box)
337
+
338
+ train_idx += 1
339
+
340
+ model.train()
341
+ return all_pred_boxes, all_true_boxes
342
+
343
+
344
+ def cells_to_bboxes(predictions, anchors, S, is_preds=True):
345
+ """
346
+ Scales the predictions coming from the model to
347
+ be relative to the entire image such that they for example later
348
+ can be plotted or.
349
+ INPUT:
350
+ predictions: tensor of size (N, 3, S, S, num_classes+5)
351
+ anchors: the anchors used for the predictions
352
+ S: the number of cells the image is divided in on the width (and height)
353
+ is_preds: whether the input is predictions or the true bounding boxes
354
+ OUTPUT:
355
+ converted_bboxes: the converted boxes of sizes (N, num_anchors, S, S, 1+5) with class index,
356
+ object score, bounding box coordinates
357
+ """
358
+ BATCH_SIZE = predictions.shape[0]
359
+ num_anchors = len(anchors)
360
+ box_predictions = predictions[..., 1:5]
361
+ if is_preds:
362
+ anchors = anchors.reshape(1, len(anchors), 1, 1, 2)
363
+ box_predictions[..., 0:2] = torch.sigmoid(box_predictions[..., 0:2])
364
+ box_predictions[..., 2:] = torch.exp(box_predictions[..., 2:]) * anchors
365
+ scores = torch.sigmoid(predictions[..., 0:1])
366
+ best_class = torch.argmax(predictions[..., 5:], dim=-1).unsqueeze(-1)
367
+ else:
368
+ scores = predictions[..., 0:1]
369
+ best_class = predictions[..., 5:6]
370
+
371
+ cell_indices = (
372
+ torch.arange(S)
373
+ .repeat(predictions.shape[0], 3, S, 1)
374
+ .unsqueeze(-1)
375
+ .to(predictions.device)
376
+ )
377
+ x = 1 / S * (box_predictions[..., 0:1] + cell_indices)
378
+ y = 1 / S * (box_predictions[..., 1:2] + cell_indices.permute(0, 1, 3, 2, 4))
379
+ w_h = 1 / S * box_predictions[..., 2:4]
380
+ converted_bboxes = torch.cat((best_class, scores, x, y, w_h), dim=-1).reshape(BATCH_SIZE, num_anchors * S * S, 6)
381
+ return converted_bboxes.tolist()
382
+
383
+ def check_class_accuracy(model, loader, threshold):
384
+ model.eval()
385
+ tot_class_preds, correct_class = 0, 0
386
+ tot_noobj, correct_noobj = 0, 0
387
+ tot_obj, correct_obj = 0, 0
388
+
389
+ for idx, (x, y) in enumerate(tqdm(loader)):
390
+ x = x.to(cfg.DEVICE)
391
+ with torch.no_grad():
392
+ out = model(x)
393
+
394
+ for i in range(3):
395
+ y[i] = y[i].to(cfg.DEVICE)
396
+ obj = y[i][..., 0] == 1 # in paper this is Iobj_i
397
+ noobj = y[i][..., 0] == 0 # in paper this is Iobj_i
398
+
399
+ correct_class += torch.sum(
400
+ torch.argmax(out[i][..., 5:][obj], dim=-1) == y[i][..., 5][obj]
401
+ )
402
+ tot_class_preds += torch.sum(obj)
403
+
404
+ obj_preds = torch.sigmoid(out[i][..., 0]) > threshold
405
+ correct_obj += torch.sum(obj_preds[obj] == y[i][..., 0][obj])
406
+ tot_obj += torch.sum(obj)
407
+ correct_noobj += torch.sum(obj_preds[noobj] == y[i][..., 0][noobj])
408
+ tot_noobj += torch.sum(noobj)
409
+
410
+ print(f"Class accuracy is: {(correct_class/(tot_class_preds+1e-16))*100:2f}%")
411
+ print(f"No obj accuracy is: {(correct_noobj/(tot_noobj+1e-16))*100:2f}%")
412
+ print(f"Obj accuracy is: {(correct_obj/(tot_obj+1e-16))*100:2f}%")
413
+ model.train()
414
+
415
+
416
+ def get_mean_std(loader):
417
+ # var[X] = E[X**2] - E[X]**2
418
+ channels_sum, channels_sqrd_sum, num_batches = 0, 0, 0
419
+
420
+ for data, _ in tqdm(loader):
421
+ channels_sum += torch.mean(data, dim=[0, 2, 3])
422
+ channels_sqrd_sum += torch.mean(data ** 2, dim=[0, 2, 3])
423
+ num_batches += 1
424
+
425
+ mean = channels_sum / num_batches
426
+ std = (channels_sqrd_sum / num_batches - mean ** 2) ** 0.5
427
+
428
+ return mean, std
429
+
430
+ def get_loaders(train_csv_path, test_csv_path):
431
+ from dataset import YOLODataset
432
+
433
+ IMAGE_SIZE = cfg.IMAGE_SIZE
434
+ train_dataset = YOLODataset(
435
+ train_csv_path,
436
+ transform=cfg.train_transforms,
437
+ S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
438
+ img_dir=cfg.IMG_DIR,
439
+ label_dir=cfg.LABEL_DIR,
440
+ anchors=cfg.ANCHORS,
441
+ )
442
+ test_dataset = YOLODataset(
443
+ test_csv_path,
444
+ transform=cfg.test_transforms,
445
+ S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
446
+ img_dir=cfg.IMG_DIR,
447
+ label_dir=cfg.LABEL_DIR,
448
+ anchors=cfg.ANCHORS,
449
+ )
450
+ train_loader = DataLoader(
451
+ dataset=train_dataset,
452
+ batch_size=cfg.BATCH_SIZE,
453
+ num_workers=cfg.NUM_WORKERS,
454
+ pin_memory=cfg.PIN_MEMORY,
455
+ shuffle=True,
456
+ drop_last=False,
457
+ )
458
+ test_loader = DataLoader(
459
+ dataset=test_dataset,
460
+ batch_size=cfg.BATCH_SIZE,
461
+ num_workers=cfg.NUM_WORKERS,
462
+ pin_memory=cfg.PIN_MEMORY,
463
+ shuffle=False,
464
+ drop_last=False,
465
+ )
466
+
467
+ train_eval_dataset = YOLODataset(
468
+ train_csv_path,
469
+ transform=cfg.test_transforms,
470
+ S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
471
+ img_dir=cfg.IMG_DIR,
472
+ label_dir=cfg.LABEL_DIR,
473
+ anchors=cfg.ANCHORS,
474
+ )
475
+ train_eval_loader = DataLoader(
476
+ dataset=train_eval_dataset,
477
+ batch_size=cfg.BATCH_SIZE,
478
+ num_workers=cfg.NUM_WORKERS,
479
+ pin_memory=cfg.PIN_MEMORY,
480
+ shuffle=False,
481
+ drop_last=False,
482
+ )
483
+
484
+ return train_loader, test_loader, train_eval_loader
485
+
486
+
487
+ def seed_everything(seed=42):
488
+ os.environ['PYTHONHASHSEED'] = str(seed)
489
+ random.seed(seed)
490
+ np.random.seed(seed)
491
+ torch.manual_seed(seed)
492
+ torch.cuda.manual_seed(seed)
493
+ torch.cuda.manual_seed_all(seed)
494
+ torch.backends.cudnn.deterministic = True
495
+ torch.backends.cudnn.benchmark = False
496
+
497
+
498
+ def clip_coords(boxes, img_shape):
499
+ # Clip bounding xyxy bounding boxes to image shape (height, width)
500
+ boxes[:, 0].clamp_(0, img_shape[1]) # x1
501
+ boxes[:, 1].clamp_(0, img_shape[0]) # y1
502
+ boxes[:, 2].clamp_(0, img_shape[1]) # x2
503
+ boxes[:, 3].clamp_(0, img_shape[0]) # y2
504
+
505
+ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
506
+ # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
507
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
508
+ y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x
509
+ y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh # top left y
510
+ y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw # bottom right x
511
+ y[..., 3] = h * (x[..., 1] + x[..., 3] / 2) + padh # bottom right y
512
+ return y
513
+
514
+
515
+ def xyn2xy(x, w=640, h=640, padw=0, padh=0):
516
+ # Convert normalized segments into pixel segments, shape (n,2)
517
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
518
+ y[..., 0] = w * x[..., 0] + padw # top left x
519
+ y[..., 1] = h * x[..., 1] + padh # top left y
520
+ return y
521
+
522
+ def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
523
+ # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
524
+ if clip:
525
+ clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip
526
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
527
+ y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w # x center
528
+ y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h # y center
529
+ y[..., 2] = (x[..., 2] - x[..., 0]) / w # width
530
+ y[..., 3] = (x[..., 3] - x[..., 1]) / h # height
531
+ return y
532
+
533
+ def clip_boxes(boxes, shape):
534
+ # Clip boxes (xyxy) to image shape (height, width)
535
+ if isinstance(boxes, torch.Tensor): # faster individually
536
+ boxes[..., 0].clamp_(0, shape[1]) # x1
537
+ boxes[..., 1].clamp_(0, shape[0]) # y1
538
+ boxes[..., 2].clamp_(0, shape[1]) # x2
539
+ boxes[..., 3].clamp_(0, shape[0]) # y2
540
+ else: # np.array (faster grouped)
541
+ boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
542
+ boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
543
+
544
+
545
+ def find_lr(model, optimizer, criterion, data_loader):
546
+ lr_finder = LRFinder(model, optimizer, criterion)
547
+ lr_finder.range_test(data_loader, end_lr=100, num_iter=100)
548
+ _, best_lr = lr_finder.plot() # to inspect the loss-learning rate graph
549
+ lr_finder.reset()
550
+
551
+ def load_model_from_checkpoint(device, file_name='ckpt_light.pth'):
552
+ checkpoint = torch.load(file_name, map_location=device)
553
+
554
+ return checkpoint
555
+
556
+ def plot_examples(model, loader, iou_threshold, threshold, anchors):
557
+
558
+ print(anchors.device)
559
+ x, y = next(iter(loader))
560
+ x = x.to(cfg.DEVICE)
561
+ out = model(x)
562
+ bboxes = [[] for _ in range(x.shape[0])]
563
+ for i in range(3):
564
+ batch_size, A, S, _, _ = out[i].shape
565
+ anchor = anchors[i]
566
+ boxes_scale_i = cells_to_bboxes(
567
+ out[i], anchor, S=S, is_preds=True
568
+ )
569
+ for idx, (box) in enumerate(boxes_scale_i):
570
+ bboxes[idx] += box
571
+
572
+ for i in range(batch_size // 4):
573
+ nms_boxes = non_max_suppression(
574
+ bboxes[i], iou_threshold=iou_threshold, threshold=threshold, box_format="midpoint",
575
+ )
576
+ plot_image(x[i].permute(1, 2, 0).detach().cpu(), nms_boxes)
577
+
578
+
579
+ def draw_predictions(image: np.ndarray, boxes: List[List], class_labels: List[str]) -> np.ndarray:
580
+ """Plots predicted bounding boxes on the image"""
581
+
582
+ colors = [[random.randint(0, 255) for _ in range(3)] for name in class_labels]
583
+
584
+ im = np.array(image)
585
+ height, width, _ = im.shape
586
+ bbox_thick = 2 #int(0.6 * (height + width) / 600)
587
+
588
+ # Create a Rectangle patch
589
+ for box in boxes:
590
+ assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height"
591
+ class_pred = box[0]
592
+ conf = box[1]
593
+ box = box[2:]
594
+ upper_left_x = box[0] - box[2] / 2
595
+ upper_left_y = box[1] - box[3] / 2
596
+
597
+ x1 = int(upper_left_x * width)
598
+ y1 = int(upper_left_y * height)
599
+
600
+ x2 = x1 + int(box[2] * width)
601
+ y2 = y1 + int(box[3] * height)
602
+
603
+ cv2.rectangle(
604
+ image,
605
+ (x1, y1), (x2, y2),
606
+ color=colors[int(class_pred)],
607
+ thickness=bbox_thick
608
+ )
609
+ text = f"{class_labels[int(class_pred)]}" #: {conf:.2f}"
610
+ t_size = cv2.getTextSize(text, 0, 0.7, thickness=bbox_thick // 2)[0]
611
+ c3 = (x1 + t_size[0], y1 - t_size[1] - 3)
612
+
613
+ cv2.rectangle(image, (x1, y1), c3, colors[int(class_pred)], -1)
614
+ cv2.putText(
615
+ image,
616
+ text,
617
+ (x1, y1 - 2),
618
+ cv2.FONT_HERSHEY_SIMPLEX,
619
+ 0.7,
620
+ (0, 0, 0),
621
+ bbox_thick // 2,
622
+ lineType=cv2.LINE_AA,
623
+ )
624
+
625
+ return image