File size: 7,140 Bytes
d4ebf73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from utils.img_utils import normalizedepth, random_crop_pad_to_shape, random_mirror, random_scale, normalize, resizedepth, resizergb, tfnyu_normalizedepth

class RGBTrainPre(object):
    def __init__(self, pytorch_mean, pytorch_std, dataset_settings):
        self.pytorch_mean = pytorch_mean
        self.pytorch_std = pytorch_std
        self.train_scale_array = dataset_settings['train_scale_array']
        self.crop_size = (dataset_settings['image_height'], dataset_settings['image_width'])

    def __call__(self, rgb, gt):
        transformed_dict = random_mirror({"rgb":rgb, "gt":gt})
        if self.train_scale_array is not None:
            transformed_dict, _ = random_scale(transformed_dict, self.train_scale_array, (rgb.shape[0], rgb.shape[1]))

        transformed_dict, _ = random_crop_pad_to_shape(transformed_dict, transformed_dict['rgb'].shape[:2], self.crop_size) #Makes gt HxWx1
        rgb = transformed_dict['rgb']
        gt = transformed_dict['gt']
        rgb = normalize(rgb, self.pytorch_mean, self.pytorch_std)

        rgb = rgb.transpose(2, 0, 1) #Brings the channel dimension in the top. Final output = CxHxW
        return rgb, gt 


class RGBValPre(object):
    def __init__(self, pytorch_mean, pytorch_std, dataset_settings):
        self.pytorch_mean = pytorch_mean
        self.pytorch_std = pytorch_std
        self.model_input_shape = (dataset_settings['image_height'], dataset_settings['image_width'])

    def __call__(self, rgb, gt):
        rgb = resizergb(rgb, self.model_input_shape)
        rgb = normalize(rgb, self.pytorch_mean, self.pytorch_std)
        rgb = rgb.transpose(2, 0, 1) #Brings the channel dimension in the top. Final output = CxHxW
        return rgb, gt


class RGBDTrainPre(object):
    def __init__(self, pytorch_mean, pytorch_std, dataset_settings):
        self.pytorch_mean = pytorch_mean
        self.pytorch_std = pytorch_std
        self.train_scale_array = dataset_settings['train_scale_array']
        self.crop_size = (dataset_settings['image_height'], dataset_settings['image_width'])

    def __call__(self, rgb, depth, gt):
        transformed_dict = random_mirror({"rgb":rgb, "depth": depth, "gt":gt})
        if self.train_scale_array is not None:
            transformed_dict, _ = random_scale(transformed_dict, self.train_scale_array, (rgb.shape[0], rgb.shape[1]))

        transformed_dict, _ = random_crop_pad_to_shape(transformed_dict, transformed_dict['rgb'].shape[:2], self.crop_size) #Makes gt HxWx1
        rgb = transformed_dict['rgb']
        depth = transformed_dict['depth']
        gt = transformed_dict['gt']
        rgb = normalize(rgb, self.pytorch_mean, self.pytorch_std)
        depth = normalizedepth(depth)
        rgb = rgb.transpose(2, 0, 1) #Brings the channel dimension in the top. Final output = CxHxW
        depth = depth.transpose(2, 0, 1) #Brings the channel dimension in the top. Final output = CxHxW
        return rgb, depth, gt 


class RGBDValPre(object):
    def __init__(self, pytorch_mean, pytorch_std, dataset_settings):
        self.pytorch_mean = pytorch_mean
        self.pytorch_std = pytorch_std
        self.model_input_shape = (dataset_settings['image_height'], dataset_settings['image_width'])

    def __call__(self, rgb, depth):
        if rgb is not None:
            rgb = resizergb(rgb, self.model_input_shape)
            rgb = normalize(rgb, self.pytorch_mean, self.pytorch_std)
            rgb = rgb.transpose(2, 0, 1) #Brings the channel dimension in the top. Final output = CxHxW
        if depth is not None:
            depth = resizedepth(depth, self.model_input_shape)
            depth = normalizedepth(depth)
            depth = depth.transpose(2, 0, 1) #Brings the channel dimension in the top. Final output = CxHxW
        
        return rgb, depth


class NYURGBDTrainPre(object):
    def __init__(self, pytorch_mean, pytorch_std, dataset_settings):
        self.pytorch_mean = pytorch_mean
        self.pytorch_std = pytorch_std
        self.train_scale_array = dataset_settings['train_scale_array']
        self.crop_size = (dataset_settings['image_height'], dataset_settings['image_width'])

    def __call__(self, rgb, depth, gt):
        transformed_dict = random_mirror({"rgb":rgb, "depth": depth, "gt":gt})
        if self.train_scale_array is not None:
            transformed_dict, _ = random_scale(transformed_dict, self.train_scale_array, (rgb.shape[0], rgb.shape[1]))

        transformed_dict, _ = random_crop_pad_to_shape(transformed_dict, transformed_dict['rgb'].shape[:2], self.crop_size) #Makes gt HxWx1
        rgb = transformed_dict['rgb']
        depth = transformed_dict['depth']
        gt = transformed_dict['gt']
        rgb = normalize(rgb, self.pytorch_mean, self.pytorch_std)
        depth = tfnyu_normalizedepth(depth)
        rgb = rgb.transpose(2, 0, 1) #Brings the channel dimension in the top. Final output = CxHxW
        depth = depth.transpose(2, 0, 1) #Brings the channel dimension in the top. Final output = CxHxW
        return rgb, depth, gt 


class NYURGBDValPre(object):
    def __init__(self, pytorch_mean, pytorch_std, dataset_settings):
        self.pytorch_mean = pytorch_mean
        self.pytorch_std = pytorch_std
        self.model_input_shape = (dataset_settings['image_height'], dataset_settings['image_width'])

    def __call__(self, rgb, depth, gt):
        rgb = resizergb(rgb, self.model_input_shape)
        depth = resizedepth(depth, self.model_input_shape)
        rgb = normalize(rgb, self.pytorch_mean, self.pytorch_std)
        depth = tfnyu_normalizedepth(depth)
        rgb = rgb.transpose(2, 0, 1) #Brings the channel dimension in the top. Final output = CxHxW
        depth = depth.transpose(2, 0, 1) #Brings the channel dimension in the top. Final output = CxHxW
        return rgb, depth, gt


class DepthTrainPre(object):
    def __init__(self, dataset_settings):
        self.train_scale_array = dataset_settings['train_scale_array']
        self.crop_size = (dataset_settings['image_height'], dataset_settings['image_width'])

    def __call__(self, depth, gt):
        transformed_dict = random_mirror({"depth": depth, "gt":gt})
        if self.train_scale_array is not None:
            transformed_dict, _ = random_scale(transformed_dict, self.train_scale_array, (depth.shape[0], depth.shape[1]))

        transformed_dict, _ = random_crop_pad_to_shape(transformed_dict, transformed_dict['depth'].shape[:2], self.crop_size) #Makes gt HxWx1
        depth = transformed_dict['depth']
        gt = transformed_dict['gt']
        depth = normalizedepth(depth)
        depth = depth.transpose(2, 0, 1) #Brings the channel dimension in the top. Final output = CxHxW
        return depth, gt 


class DepthValPre(object):
    def __init__(self, dataset_settings):
        self.model_input_shape = (dataset_settings['image_height'], dataset_settings['image_width'])

    def __call__(self, depth, gt):
        depth = resizedepth(depth, self.model_input_shape)
        depth = normalizedepth(depth)
        depth = depth.transpose(2, 0, 1) #Brings the channel dimension in the top. Final output = CxHxW
        return depth, gt