|
import torch.nn.functional as F |
|
from torch import nn |
|
|
|
|
|
class PreactResBlock(nn.Sequential): |
|
def __init__(self, dim): |
|
super().__init__( |
|
nn.GroupNorm(dim // 16, dim), |
|
nn.GELU(), |
|
nn.Conv2d(dim, dim, 3, padding=1), |
|
nn.GroupNorm(dim // 16, dim), |
|
nn.GELU(), |
|
nn.Conv2d(dim, dim, 3, padding=1), |
|
) |
|
|
|
def forward(self, x): |
|
return x + super().forward(x) |
|
|
|
|
|
class UNetBlock(nn.Module): |
|
def __init__(self, input_dim, output_dim=None, scale_factor=1.0): |
|
super().__init__() |
|
if output_dim is None: |
|
output_dim = input_dim |
|
self.pre_conv = nn.Conv2d(input_dim, output_dim, 3, padding=1) |
|
self.res_block1 = PreactResBlock(output_dim) |
|
self.res_block2 = PreactResBlock(output_dim) |
|
self.downsample = self.upsample = nn.Identity() |
|
if scale_factor > 1: |
|
self.upsample = nn.Upsample(scale_factor=scale_factor) |
|
elif scale_factor < 1: |
|
self.downsample = nn.Upsample(scale_factor=scale_factor) |
|
|
|
def forward(self, x, h=None): |
|
""" |
|
Args: |
|
x: (b c h w), last output |
|
h: (b c h w), skip output |
|
Returns: |
|
o: (b c h w), output |
|
s: (b c h w), skip output |
|
""" |
|
x = self.upsample(x) |
|
if h is not None: |
|
assert x.shape == h.shape, f"{x.shape} != {h.shape}" |
|
x = x + h |
|
x = self.pre_conv(x) |
|
x = self.res_block1(x) |
|
x = self.res_block2(x) |
|
return self.downsample(x), x |
|
|
|
|
|
class UNet(nn.Module): |
|
def __init__(self, input_dim, output_dim, hidden_dim=16, num_blocks=4, num_middle_blocks=2): |
|
super().__init__() |
|
self.input_dim = input_dim |
|
self.output_dim = output_dim |
|
self.input_proj = nn.Conv2d(input_dim, hidden_dim, 3, padding=1) |
|
self.encoder_blocks = nn.ModuleList( |
|
[ |
|
UNetBlock(input_dim=hidden_dim * 2**i, output_dim=hidden_dim * 2 ** (i + 1), scale_factor=0.5) |
|
for i in range(num_blocks) |
|
] |
|
) |
|
self.middle_blocks = nn.ModuleList( |
|
[UNetBlock(input_dim=hidden_dim * 2**num_blocks) for _ in range(num_middle_blocks)] |
|
) |
|
self.decoder_blocks = nn.ModuleList( |
|
[ |
|
UNetBlock(input_dim=hidden_dim * 2 ** (i + 1), output_dim=hidden_dim * 2**i, scale_factor=2) |
|
for i in reversed(range(num_blocks)) |
|
] |
|
) |
|
self.head = nn.Sequential( |
|
nn.Conv2d(hidden_dim, hidden_dim, 3, padding=1), |
|
nn.GELU(), |
|
nn.Conv2d(hidden_dim, output_dim, 1), |
|
) |
|
|
|
@property |
|
def scale_factor(self): |
|
return 2 ** len(self.encoder_blocks) |
|
|
|
def pad_to_fit(self, x): |
|
""" |
|
Args: |
|
x: (b c h w), input |
|
Returns: |
|
x: (b c h' w'), padded input |
|
""" |
|
hpad = (self.scale_factor - x.shape[2] % self.scale_factor) % self.scale_factor |
|
wpad = (self.scale_factor - x.shape[3] % self.scale_factor) % self.scale_factor |
|
return F.pad(x, (0, wpad, 0, hpad)) |
|
|
|
def forward(self, x): |
|
""" |
|
Args: |
|
x: (b c h w), input |
|
Returns: |
|
o: (b c h w), output |
|
""" |
|
shape = x.shape |
|
|
|
x = self.pad_to_fit(x) |
|
x = self.input_proj(x) |
|
|
|
s_list = [] |
|
for block in self.encoder_blocks: |
|
x, s = block(x) |
|
s_list.append(s) |
|
|
|
for block in self.middle_blocks: |
|
x, _ = block(x) |
|
|
|
for block, s in zip(self.decoder_blocks, reversed(s_list)): |
|
x, _ = block(x, s) |
|
|
|
x = self.head(x) |
|
x = x[..., : shape[2], : shape[3]] |
|
|
|
return x |
|
|
|
def test(self, shape=(3, 512, 256)): |
|
import ptflops |
|
|
|
macs, params = ptflops.get_model_complexity_info( |
|
self, |
|
shape, |
|
as_strings=True, |
|
print_per_layer_stat=True, |
|
verbose=True, |
|
) |
|
|
|
print(f"macs: {macs}") |
|
print(f"params: {params}") |
|
|
|
|
|
def main(): |
|
model = UNet(3, 3) |
|
model.test() |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|