|
import os |
|
import numpy as np |
|
from PIL import Image |
|
from torch.utils.data import DataLoader, Dataset |
|
from torch.utils.data.sampler import SubsetRandomSampler |
|
import torch |
|
import torchvision.transforms as transforms |
|
from tqdm import tqdm |
|
from sklearn.model_selection import train_test_split |
|
from typing import List, Optional |
|
from glob import glob |
|
|
|
|
|
|
|
class Dataset_3DCNN(Dataset): |
|
"Characterizes a dataset for PyTorch" |
|
def __init__(self, |
|
path : str, |
|
folders : List[str], |
|
labels : List[float], |
|
frames : List[int], |
|
transform : Optional[transforms.Compose] = None): |
|
"Initialization" |
|
self.path = path |
|
self.labels = labels |
|
self.folders = folders |
|
self.transform = transform |
|
self.frames = frames |
|
|
|
def __len__(self): |
|
"Denotes the total number of samples" |
|
return len(self.folders) |
|
|
|
def read_images(self, path, selected_folder, use_transform): |
|
X = [] |
|
for i in self.frames: |
|
image = Image.open(os.path.join(path, selected_folder, 'frame_{:01d}.jpg'.format(i))).convert('L') |
|
|
|
if use_transform is not None: |
|
image = use_transform(image) |
|
else: |
|
image = transforms.ToTensor()(image) |
|
|
|
X.append(image.squeeze_(0)) |
|
X = torch.stack(X, dim=0) |
|
|
|
return X |
|
|
|
def __getitem__(self, index): |
|
"Generates one sample of data" |
|
|
|
folder = self.folders[index] |
|
|
|
|
|
X = self.read_images(self.path, folder, self.transform).unsqueeze_(0) |
|
y = torch.LongTensor([self.labels[index]]) |
|
|
|
|
|
return X, y |
|
|
|
|
|
|
|
|
|
|
|
def create_datasets(path : str = r'D:\All_files\pys\AI_algos\Mikes_Work\viscosity-video-classification\code_digdiscovery\new_honey_164', |
|
validation_split : float = 0.2, |
|
test_split : float = 0.2, |
|
batch_size : int = 32, |
|
transform : transforms.Compose = transforms.Compose([transforms.Resize([256, 342]), |
|
transforms.ToTensor(), |
|
transforms.Normalize(mean=[0.5], std=[0.5])]), |
|
random_seed : int = 112, |
|
shuffle : bool = True, |
|
selected_frames : List[int] = [0,10,20]): |
|
|
|
|
|
all_X_list = [filename for filename in os.listdir(path)] |
|
all_y_list = [int(filename) for filename in os.listdir(path)] |
|
|
|
|
|
train_list, test_list, train_label, test_label = train_test_split(all_X_list, all_y_list, test_size=test_split, random_state=random_seed) |
|
|
|
|
|
|
|
train_set, test_set = Dataset_3DCNN(path, train_list, train_label, selected_frames, transform=transform), \ |
|
Dataset_3DCNN(path, test_list, test_label, selected_frames, transform=transform) |
|
print('length test set ', len(test_set)) |
|
|
|
|
|
num_train = len(train_list) |
|
indices = list(range(num_train)) |
|
|
|
if shuffle : |
|
np.random.seed(random_seed) |
|
np.random.shuffle(indices) |
|
|
|
split = int(np.floor(validation_split * num_train)) |
|
train_idx, valid_idx = indices[split:], indices[:split] |
|
|
|
train_sampler = SubsetRandomSampler(train_idx) |
|
|
|
valid_sampler = SubsetRandomSampler(valid_idx) |
|
|
|
|
|
train_loader = DataLoader(train_set, |
|
batch_size=batch_size, |
|
sampler=train_sampler, |
|
num_workers=0) |
|
valid_loader = DataLoader(train_set, |
|
batch_size=batch_size, |
|
sampler=valid_sampler, |
|
num_workers=0) |
|
|
|
test_loader = DataLoader(test_set, |
|
batch_size=batch_size, |
|
num_workers=0) |
|
|
|
|
|
|
|
return train_loader, test_loader, valid_loader |