|
import torch |
|
from torch import nn |
|
|
|
|
|
class TimeDepthSeparableConv(nn.Module): |
|
"""Time depth separable convolution as in https://arxiv.org/pdf/1904.02619.pdf |
|
It shows competative results with less computation and memory footprint.""" |
|
def __init__(self, |
|
in_channels, |
|
hid_channels, |
|
out_channels, |
|
kernel_size, |
|
bias=True): |
|
super().__init__() |
|
|
|
self.in_channels = in_channels |
|
self.out_channels = out_channels |
|
self.hid_channels = hid_channels |
|
self.kernel_size = kernel_size |
|
|
|
self.time_conv = nn.Conv1d( |
|
in_channels, |
|
2 * hid_channels, |
|
kernel_size=1, |
|
stride=1, |
|
padding=0, |
|
bias=bias, |
|
) |
|
self.norm1 = nn.BatchNorm1d(2 * hid_channels) |
|
self.depth_conv = nn.Conv1d( |
|
hid_channels, |
|
hid_channels, |
|
kernel_size, |
|
stride=1, |
|
padding=(kernel_size - 1) // 2, |
|
groups=hid_channels, |
|
bias=bias, |
|
) |
|
self.norm2 = nn.BatchNorm1d(hid_channels) |
|
self.time_conv2 = nn.Conv1d( |
|
hid_channels, |
|
out_channels, |
|
kernel_size=1, |
|
stride=1, |
|
padding=0, |
|
bias=bias, |
|
) |
|
self.norm3 = nn.BatchNorm1d(out_channels) |
|
|
|
def forward(self, x): |
|
x_res = x |
|
x = self.time_conv(x) |
|
x = self.norm1(x) |
|
x = nn.functional.glu(x, dim=1) |
|
x = self.depth_conv(x) |
|
x = self.norm2(x) |
|
x = x * torch.sigmoid(x) |
|
x = self.time_conv2(x) |
|
x = self.norm3(x) |
|
x = x_res + x |
|
return x |
|
|
|
|
|
class TimeDepthSeparableConvBlock(nn.Module): |
|
def __init__(self, |
|
in_channels, |
|
hid_channels, |
|
out_channels, |
|
num_layers, |
|
kernel_size, |
|
bias=True): |
|
super().__init__() |
|
assert (kernel_size - 1) % 2 == 0 |
|
assert num_layers > 1 |
|
|
|
self.layers = nn.ModuleList() |
|
layer = TimeDepthSeparableConv( |
|
in_channels, hid_channels, |
|
out_channels if num_layers == 1 else hid_channels, kernel_size, |
|
bias) |
|
self.layers.append(layer) |
|
for idx in range(num_layers - 1): |
|
layer = TimeDepthSeparableConv( |
|
hid_channels, hid_channels, out_channels if |
|
(idx + 1) == (num_layers - 1) else hid_channels, kernel_size, |
|
bias) |
|
self.layers.append(layer) |
|
|
|
def forward(self, x, mask): |
|
for layer in self.layers: |
|
x = layer(x * mask) |
|
return x |
|
|