|
""" |
|
Creates a Pytorch dataset to load the Pascal VOC & MS COCO datasets |
|
""" |
|
|
|
import os |
|
|
|
import numpy as np |
|
import pandas as pd |
|
import torch |
|
from PIL import Image, ImageFile |
|
from torch.utils.data import Dataset |
|
|
|
ImageFile.LOAD_TRUNCATED_IMAGES = True |
|
|
|
|
|
class YOLODataset(Dataset): |
|
def __init__( |
|
self, |
|
csv_file, |
|
img_dir, |
|
label_dir, |
|
anchors, |
|
image_size=416, |
|
S=[13, 26, 52], |
|
transform=None, |
|
load_mosaic=True, |
|
): |
|
self.annotations = pd.read_csv(csv_file) |
|
self.img_dir = img_dir |
|
self.label_dir = label_dir |
|
self.image_size = image_size |
|
self.transform = transform |
|
self.S = S |
|
self.load_mosaic = load_mosaic |
|
|
|
|
|
self.anchors = torch.tensor(anchors) |
|
self.num_anchors_per_scale = self.anchors.shape[1] |
|
|
|
def __len__(self): |
|
return len(self.annotations) |
|
|
|
@staticmethod |
|
def iou(box, anchors): |
|
""" |
|
box: |
|
tensor shape: [2] |
|
anchors: |
|
tensor shape: [number of states, number of anchors, 2] |
|
|
|
* 2 above is for width and height |
|
""" |
|
|
|
intersection = torch.prod(torch.min(box, anchors), dim=-1) |
|
union = torch.prod(box) + torch.prod(anchors, dim=-1) - intersection |
|
return intersection / union |
|
|
|
def __getitem__(self, index): |
|
label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1]) |
|
|
|
|
|
bboxes = np.roll( |
|
np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1 |
|
) |
|
img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0]) |
|
image = np.array(Image.open(img_path).convert("RGB")) |
|
|
|
if self.transform: |
|
augmentations = self.transform(image=image, bboxes=bboxes) |
|
image = augmentations["image"] |
|
bboxes = augmentations["bboxes"] |
|
|
|
""" |
|
Below assumes 3 scale predictions (as paper) and same num of anchors per scale |
|
6 = [objectness, cx, cy, w, h, class] |
|
""" |
|
targets = [torch.zeros((self.num_anchors_per_scale, S, S, 6)) for S in self.S] |
|
|
|
for bbox in bboxes: |
|
iou = self.iou(torch.tensor(bbox[2:4]), self.anchors) |
|
|
|
idx = torch.argsort(iou, descending=True, dim=-1) |
|
idx = idx[:, 0].tolist() |
|
|
|
dimensions, class_ = np.array(bbox[:-1]), bbox[-1] |
|
|
|
for scale_idx, anchor_id in enumerate(idx): |
|
scale_dim = self.S[scale_idx] |
|
scale_cx, scale_cy, scale_w, scale_h = dimensions * scale_dim |
|
|
|
row, col = int(scale_cy), int(scale_cx) |
|
|
|
|
|
scale_cx = scale_cx - col |
|
scale_cy = scale_cy - row |
|
|
|
box_target = torch.tensor( |
|
[1, scale_cx, scale_cy, scale_w, scale_h, class_] |
|
) |
|
|
|
targets[scale_idx][anchor_id, row, col] = box_target |
|
|
|
return image, targets |
|
|
|
|
|
if __name__ == "__main__": |
|
from src.run.yolov3 import config |
|
|
|
IMAGE_SIZE = config.IMAGE_SIZE |
|
train_dataset = YOLODataset( |
|
config.DATASET + "/2examples.csv", |
|
transform=config.train_transforms, |
|
S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8], |
|
img_dir=config.IMG_DIR, |
|
label_dir=config.LABEL_DIR, |
|
anchors=config.ANCHORS, |
|
) |
|
|