File size: 6,021 Bytes

dc7407d

import torch
from torch import nn
from utils.datastruct import NetData
from models.feedforward import LinLayers


class CNN3D_Mike(nn.Module):
    def __init__(self, t_dim=30, img_x=256 , img_y=342, drop_p=0, fc_hidden1=256, fc_hidden2=256):
        super(CNN3D_Mike, self).__init__()        # set video dimension
        self.t_dim = t_dim
        self.img_x = img_x
        self.img_y = img_y
        # fully connected layer hidden nodes
        self.fc_hidden1, self.fc_hidden2 = fc_hidden1, fc_hidden2
        self.drop_p = drop_p
        #self.num_classes = num_classes
        self.ch1, self.ch2 = 32, 48
        self.k1, self.k2 = (5, 5, 5), (3, 3, 3)  # 3d kernel size
        self.s1, self.s2 = (2, 2, 2), (2, 2, 2)  # 3d strides
        self.pd1, self.pd2 = (0, 0, 0), (0, 0, 0)  # 3d padding        # compute conv1 & conv2 output shape
        self.conv1_outshape = conv3D_output_size((self.t_dim, self.img_x, self.img_y), self.pd1, self.k1, self.s1)
        self.conv2_outshape = conv3D_output_size(self.conv1_outshape, self.pd2, self.k2, self.s2)        
        self.conv1 = nn.Conv3d(in_channels=1, out_channels=self.ch1, kernel_size=self.k1, stride=self.s1,
                               padding=self.pd1)
        self.bn1 = nn.BatchNorm3d(self.ch1)
        self.conv2 = nn.Conv3d(in_channels=self.ch1, out_channels=self.ch2, kernel_size=self.k2, stride=self.s2,
                               padding=self.pd2)
        self.bn2 = nn.BatchNorm3d(self.ch2)
        self.relu = nn.ReLU(inplace=True)
        self.drop = nn.Dropout3d(self.drop_p)
        self.pool = nn.MaxPool3d(2)
        self.fc1 = nn.Linear(self.ch2*self.conv2_outshape[0]*self.conv2_outshape[1]*self.conv2_outshape[2],
                             self.fc_hidden1)  # fully connected hidden layer
        self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2)
        self.fc3 = nn.Linear(self.fc_hidden2,1)  # fully connected layer, output = multi-classes 
        
        
    def forward(self, x_3d):
        # Conv 1
        x = self.conv1(x_3d)
       
        x = self.bn1(x)
        x = self.relu(x)
        x = self.drop(x)
        # Conv 2
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.drop(x)
        # FC 1 and 2
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        
        #x = F.relu(self.fc3(x))
        #x = F.relu(self.fc3(x))
        x = F.dropout(x, p=self.drop_p, training=self.training)
        #x = self.fc3(x)
        #x = F.softmax(self.fc2(x))
        
        x = self.fc3(x) 
        
        
        
        return x

    

class CNNLayers(nn.Module):

    def __init__(self, args):
    
        super(CNNLayers, self).__init__()
        
        self.in_dim = args.in_dim# 1/3
        self.n_f = args.n_f#[32,64]
        self.kernel_size = args.kernel_size # [(5,5,5), (3,3,3)]
        self.activations = args.activations#['relu', 'relu']
        self.bns = args.bns #[True, True], 
        self.dropouts = args.dropouts #[True, True]
        #self.dropouts_ps = args.dropouts_ps#[0.5,.7]
        self.paddings = args.paddings #[(0,0,0),(0,0,0)]
        self.strides = args.strides # strides [(1,1,1),(1,1,1),(1,1,1)])
        #self.poolings = args.poolings
        
        assert len(self.n_f) == len(self.activations) == len(self.bns) == len(self.dropouts), 'dimensions mismatch : check dimensions!'
        
        # generate layers seq of seq 
        self._get_layers()
       
    def _get_layers(self):
        
        layers =nn.ModuleList()
        in_channels = self.in_dim
        
        for idx, chans in enumerate(self.n_f):
            sub_layers = nn.ModuleList()                            
                                        
            sub_layers.append(nn.Conv3d(in_channels = in_channels,
                                        out_channels = chans, #self.n_f[idx],
                                        kernel_size = self.kernel_size[idx],
                                        stride = self.strides[idx],
                                        padding = self.paddings[idx]
                                        ))
                                        


            if self.bns[idx] : sub_layers.append(nn.BatchNorm3d(num_features = self.n_f[idx]))

            #if self.dropouts[idx] : sub_layers.append(nn.Dropout3d(p = self.dropouts_ps[idx]))
            
            if self.dropouts[idx] : sub_layers.append(nn.Dropout3d(p = self.dropouts[idx]))

            #if self.activations[idx]  : sub_layers.append(self.__class__.get_activation(self.activations[idx]))
            
            if self.activations[idx]  : sub_layers.append(self.activations[idx])
            
            sub_layers = nn.Sequential(*sub_layers) 
            
            layers.append(sub_layers)
            
            in_channels = self.n_f[idx]
    
        self.layers = nn.Sequential(*layers)
        
        
    @staticmethod
    def get_activation(activation):
        if activation == 'relu':
            activation=nn.ReLU()
        elif activation == 'leakyrelu':
            activation=nn.LeakyReLU(negative_slope=0.1)
        elif activation == 'selu':
            activation=nn.SELU()
        
        return activation
        
        
        
    def forward(self, x):
        
        x = self.layers(x)
        
        return x 
        
        

class CNN3D(nn.Module):

    def __init__(self, args):
        super(CNN3D,self).__init__()
        # check datatype
        if not isinstance(args, NetData):
            raise TypeError("input must be a ParserClass")
            
        self.cnn3d = CNNLayers(args.cnn3d)

        self.lin = LinLayers(args.lin)
        
        self.in_dim = args.lin.in_dim
        
        
    def forward(self, x):
        
        # cnn 3d
        x = self.cnn3d(x)
        
        x = x.view(-1, self.in_dim)
        
        # feedforward
        x = self.lin(x)
        
        return x