File size: 4,223 Bytes
1ba06ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import glob
import random
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from PIL import Image
import torchvision.transforms as transforms
##import matplotlib.pyplot as plt
##import matplotlib.patches as patches
from skimage.transform import resize
import sys
class ImageFolder(Dataset):
def __init__(self, folder_path, img_size=416):
self.files = sorted(glob.glob('%s/*.*' % folder_path))
self.img_shape = (img_size, img_size)
def __getitem__(self, index):
img_path = self.files[index % len(self.files)]
# Extract image
img = np.array(Image.open(img_path))
h, w, _ = img.shape
dim_diff = np.abs(h - w)
# Upper (left) and lower (right) padding
pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
# Determine padding
pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
# Add padding
input_img = np.pad(img, pad, 'constant', constant_values=127.5) / 255.
# Resize and normalize
input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
# Channels-first
input_img = np.transpose(input_img, (2, 0, 1))
# As pytorch tensor
input_img = torch.from_numpy(input_img).float()
return img_path, input_img
def __len__(self):
return len(self.files)
class ListDataset(Dataset):
def __init__(self, list_path, img_size=416):
with open(list_path, 'r') as file:
self.img_files = file.readlines()
self.label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt') for path in self.img_files]
self.img_shape = (img_size, img_size)
self.max_objects = 50
def __getitem__(self, index):
#---------
# Image
#---------
img_path = self.img_files[index % len(self.img_files)].rstrip()
img = np.array(Image.open(img_path))
# Handles images with less than three channels
while len(img.shape) != 3:
index += 1
img_path = self.img_files[index % len(self.img_files)].rstrip()
img = np.array(Image.open(img_path))
h, w, _ = img.shape
dim_diff = np.abs(h - w)
# Upper (left) and lower (right) padding
pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
# Determine padding
pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
# Add padding
input_img = np.pad(img, pad, 'constant', constant_values=128) / 255.
padded_h, padded_w, _ = input_img.shape
# Resize and normalize
input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
# Channels-first
input_img = np.transpose(input_img, (2, 0, 1))
# As pytorch tensor
input_img = torch.from_numpy(input_img).float()
#---------
# Label
#---------
label_path = self.label_files[index % len(self.img_files)].rstrip()
labels = None
if os.path.exists(label_path):
labels = np.loadtxt(label_path).reshape(-1, 5)
# Extract coordinates for unpadded + unscaled image
x1 = w * (labels[:, 1] - labels[:, 3]/2)
y1 = h * (labels[:, 2] - labels[:, 4]/2)
x2 = w * (labels[:, 1] + labels[:, 3]/2)
y2 = h * (labels[:, 2] + labels[:, 4]/2)
# Adjust for added padding
x1 += pad[1][0]
y1 += pad[0][0]
x2 += pad[1][0]
y2 += pad[0][0]
# Calculate ratios from coordinates
labels[:, 1] = ((x1 + x2) / 2) / padded_w
labels[:, 2] = ((y1 + y2) / 2) / padded_h
labels[:, 3] *= w / padded_w
labels[:, 4] *= h / padded_h
# Fill matrix
filled_labels = np.zeros((self.max_objects, 5))
if labels is not None:
filled_labels[range(len(labels))[:self.max_objects]] = labels[:self.max_objects]
filled_labels = torch.from_numpy(filled_labels)
return img_path, input_img, filled_labels
def __len__(self):
return len(self.img_files)
|