MOFA-Video-Hybrid / models /occlusion /hourglass.py

init

4e42a1b 8 months ago

12.5 kB

	from torch import nn
	from torch import nn
	import torch.nn.functional as F
	import torch
	# from sync_batchnorm import SynchronizedBatchNorm2d as BatchNorm2d

	# class ResBlock2d(nn.Module):
	# def __init__(self, in_features, kernel_size, padding):
	# super(ResBlock2d, self).__init__()
	# self.conv1 = nn.Conv2d(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size,
	# padding=padding)
	# self.conv2 = nn.Conv2d(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size,
	# padding=padding)
	# self.norm1 = BatchNorm2d(in_features)
	# self.norm2 = BatchNorm2d(in_features)
	# self.relu = nn.ReLU()
	# def forward(self, x):
	# out = self.norm1(x)
	# out = self.relu(out)
	# out = self.conv1(out)
	# out = self.norm2(out)
	# out = self.relu(out)
	# out = self.conv2(out)
	# out += x
	# return out

	class UpBlock2d(nn.Module):
	def __init__(self, in_features, out_features, kernel_size=3, padding=1, groups=1):
	super(UpBlock2d, self).__init__()
	self.conv = nn.Conv2d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size,
	padding=padding, groups=groups)
	# self.norm = BatchNorm2d(out_features)
	self.relu = nn.ReLU()
	def forward(self, x):
	out = x
	# out = F.interpolate(x, scale_factor=2)
	out = self.conv(out)
	# out = self.norm(out)
	out = F.relu(out)
	return out

	class DownBlock2d(nn.Module):
	def __init__(self, in_features, out_features, kernel_size=3, padding=1, groups=1):
	super(DownBlock2d, self).__init__()
	self.conv = nn.Conv2d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size,
	padding=padding, groups=groups)
	# self.norm = BatchNorm2d(out_features)
	# self.pool = nn.AvgPool2d(kernel_size=(2, 2))
	self.relu = nn.ReLU()
	def forward(self, x):
	out = self.conv(x)
	# out = self.norm(out)
	out = self.relu(out)
	# out = self.pool(out)
	return out

	class SameBlock2d(nn.Module):
	def __init__(self, in_features, out_features, groups=1, kernel_size=3, padding=1):
	super(SameBlock2d, self).__init__()
	self.conv = nn.Conv2d(in_channels=in_features, out_channels=out_features,
	kernel_size=kernel_size, padding=padding, groups=groups)
	# self.norm = BatchNorm2d(out_features)
	self.relu = nn.ReLU()
	def forward(self, x):
	out = self.conv(x)
	# out = self.norm(out)
	out = self.relu(out)
	return out

	class HourglassEncoder(nn.Module):
	def __init__(self, block_expansion, in_features, num_blocks=3, max_features=256):
	super(HourglassEncoder, self).__init__()
	down_blocks = []
	for i in range(num_blocks):
	down_blocks.append(DownBlock2d(in_features if i == 0 else min(max_features, block_expansion * (2 ** i)),
	min(max_features, block_expansion * (2 ** (i + 1))),
	kernel_size=3, padding=1))
	self.down_blocks = nn.ModuleList(down_blocks)

	def forward(self, x):
	outs = [x]
	for down_block in self.down_blocks:
	outs.append(down_block(outs[-1]))
	outs = outs[1:]
	return outs

	class HourglassDecoder(nn.Module):
	def __init__(self, block_expansion, in_features, num_blocks=3, max_features=256):
	super(HourglassDecoder, self).__init__()
	up_blocks = []
	for i in range(num_blocks)[::-1]:
	in_filters = (1 if i == num_blocks - 1 else 2) * min(max_features, block_expansion * (2 ** (i + 1)))
	out_filters = min(max_features, block_expansion * (2 ** i))
	up_blocks.append(UpBlock2d(in_filters, out_filters, kernel_size=3, padding=1))
	self.up_blocks = nn.ModuleList(up_blocks)
	self.out_filters = block_expansion
	def forward(self, x):
	new_out = None
	for up_block in self.up_blocks:
	out = x.pop()
	if new_out is not None:
	out = torch.cat([out, new_out], dim=1)
	new_out = up_block(out)

	return new_out

	class Hourglass(nn.Module):
	def __init__(self, block_expansion, in_features, num_blocks=3, max_features=256):
	super(Hourglass, self).__init__()
	self.encoder = HourglassEncoder(block_expansion, in_features, num_blocks, max_features)
	self.decoder = HourglassDecoder(block_expansion, in_features, num_blocks, max_features)
	self.out_filters = self.decoder.out_filters
	def forward(self, x):
	return self.decoder(self.encoder(x))

	# class AntiAliasInterpolation2d(nn.Module):
	# """
	# Band-limited downsampling, for better preservation of the input signal.
	# """
	# def __init__(self, channels, scale):
	# super(AntiAliasInterpolation2d, self).__init__()
	# sigma = (1 / scale - 1) / 2
	# kernel_size = 2 * round(sigma * 4) + 1
	# self.ka = kernel_size // 2
	# self.kb = self.ka - 1 if kernel_size % 2 == 0 else self.ka

	# kernel_size = [kernel_size, kernel_size]
	# sigma = [sigma, sigma]
	# # The gaussian kernel is the product of the
	# # gaussian function of each dimension.
	# kernel = 1
	# meshgrids = torch.meshgrid(
	# [
	# torch.arange(size, dtype=torch.float32)
	# for size in kernel_size
	# ]
	# )
	# for size, std, mgrid in zip(kernel_size, sigma, meshgrids):
	# mean = (size - 1) / 2
	# kernel = torch.exp(-(mgrid - mean) * 2 / (2 * std ** 2))

	# # Make sure sum of values in gaussian kernel equals 1.
	# kernel = kernel / torch.sum(kernel)
	# # Reshape to depthwise convolutional weight
	# kernel = kernel.view(1, 1, *kernel.size())
	# kernel = kernel.repeat(channels, [1] (kernel.dim() - 1))

	# self.register_buffer('weight', kernel)
	# self.groups = channels
	# self.scale = scale

	# def forward(self, input):
	# if self.scale == 1.0:
	# return input

	# out = F.pad(input, (self.ka, self.kb, self.ka, self.kb))
	# out = F.conv2d(out, weight=self.weight, groups=self.groups)
	# out = F.interpolate(out, scale_factor=(self.scale, self.scale))

	# return out

	# class Encoder(nn.Module):
	# def __init__(self, num_channels, num_down_blocks=3, block_expansion=64, max_features=512,
	# ):
	# super(Encoder, self).__init__()
	# self.in_conv = SameBlock2d(num_channels, block_expansion, kernel_size=(7, 7), padding=(3, 3))
	# down_blocks = []
	# for i in range(num_down_blocks):
	# in_features = min(max_features, block_expansion * (2 ** i))
	# out_features = min(max_features, block_expansion * (2 ** (i + 1)))
	# down_blocks.append(DownBlock2d(in_features, out_features, kernel_size=(3, 3), padding=(1, 1)))
	# self.down_blocks = nn.Sequential(*down_blocks)
	# def forward(self, image):
	# out = self.in_conv(image)
	# out = self.down_blocks(out)
	# return out

	# class Bottleneck(nn.Module):
	# def __init__(self, num_bottleneck_blocks,num_down_blocks=3, block_expansion=64, max_features=512):
	# super(Bottleneck, self).__init__()
	# bottleneck = []
	# in_features = min(max_features, block_expansion * (2 ** num_down_blocks))
	# for i in range(num_bottleneck_blocks):
	# bottleneck.append(ResBlock2d(in_features, kernel_size=(3, 3), padding=(1, 1)))
	# self.bottleneck = nn.Sequential(*bottleneck)
	# def forward(self, feature_map):
	# out = self.bottleneck(feature_map)
	# return out

	class Decoder(nn.Module):
	def __init__(self,num_channels, num_down_blocks=3, block_expansion=64, max_features=512):
	super(Decoder, self).__init__()
	up_blocks = []
	for i in range(num_down_blocks):
	in_features = min(max_features, block_expansion * (2 ** (num_down_blocks - i)))
	out_features = min(max_features, block_expansion * (2 ** (num_down_blocks - i - 1)))
	up_blocks.append(UpBlock2d(in_features, out_features, kernel_size=(3, 3), padding=(1, 1)))
	self.up_blocks = nn.Sequential(*up_blocks)
	self.out_conv = nn.Conv2d(block_expansion, num_channels, kernel_size=(7, 7), padding=(3, 3))
	self.sigmoid = nn.Sigmoid()
	def forward(self, feature_map):
	out = self.up_blocks(feature_map)
	out = self.out_conv(out)
	out = self.sigmoid(out)
	return out

	# def warp_image(image, motion_flow):
	# _, h_old, w_old, _ = motion_flow.shape
	# _, _, h, w = image.shape
	# if h_old != h or w_old != w:
	# motion_flow = motion_flow.permute(0, 3, 1, 2)
	# motion_flow = F.interpolate(motion_flow, size=(h, w), mode='bilinear')
	# motion_flow = motion_flow.permute(0, 2, 3, 1)
	# return F.grid_sample(image, motion_flow)

	# def make_coordinate_grid(spatial_size, type):
	# h, w = spatial_size
	# x = torch.arange(w).type(type)
	# y = torch.arange(h).type(type)
	# x = (2 * (x / (w - 1)) - 1)
	# y = (2 * (y / (h - 1)) - 1)
	# yy = y.view(-1, 1).repeat(1, w)
	# xx = x.view(1, -1).repeat(h, 1)
	# meshed = torch.cat([xx.unsqueeze_(2), yy.unsqueeze_(2)], 2)
	# return meshed

	class ForegroundMatting(nn.Module):
	def __init__(self, num_channels, num_blocks=3, block_expansion=64, max_features=512):
	super(ForegroundMatting, self).__init__()
	# self.down_sample_image = AntiAliasInterpolation2d(num_channels, scale_factor)
	# self.down_sample_flow = AntiAliasInterpolation2d(2, scale_factor)
	self.hourglass = Hourglass(
	block_expansion=block_expansion,
	in_features=num_channels * 2 + 2,
	max_features=max_features,
	num_blocks=num_blocks
	)

	# self.foreground_mask = nn.Conv2d(self.hourglass.out_filters, 1, kernel_size=(7, 7), padding=(3, 3))

	self.matting_mask = nn.Conv2d(self.hourglass.out_filters, 1, kernel_size=(7, 7), padding=(3, 3))
	self.matting = nn.Conv2d(self.hourglass.out_filters, num_channels, kernel_size=(7, 7), padding=(3, 3))

	# self.scale_factor = scale_factor
	self.sigmoid = nn.Sigmoid()

	def forward(self, reference_image, dense_flow, warped_image):
	'''
	source_image : b, c, h, w
	dense_tensor: b, 2, h, w
	warped_image: b, c, h, w
	'''

	# res_out = {}
	# batch, _, h, w = reference_image.shape

	# warped_image = warp_image(reference_image, dense_flow)#warp the image with dense flow
	# res_out['warped_image'] = warped_image

	hourglass_input = torch.cat([reference_image, dense_flow, warped_image], dim=1)
	hourglass_out = self.hourglass(hourglass_input)

	# foreground_mask = self.foreground_mask(hourglass_out) # compute foreground mask
	# foreground_mask = self.sigmoid(foreground_mask).permute(0,2,3,1)
	# res_out['foreground_mask'] = foreground_mask
	# grid_flow = make_coordinate_grid((h, w), dense_flow.type())
	# dense_flow_foreground = dense_flow * foreground_mask + (1-foreground_mask) * grid_flow.unsqueeze(0) ## revise the dense flow
	# res_out['dense_flow_foreground'] = dense_flow_foreground
	# res_out['dense_flow_foreground_vis'] = dense_flow * foreground_mask

	matting_mask = self.matting_mask(hourglass_out) # compute matting mask
	matting_mask = self.sigmoid(matting_mask)
	# res_out['matting_mask'] = matting_mask

	matting_image = self.matting(hourglass_out) # computing matting image
	# res_out['matting_image'] = matting_image

	out = warped_image * matting_mask + matting_image * (1 - matting_mask)

	return out, matting_mask



	if __name__ == '__main__':

	device = 'cuda'
	b, c, h, w = 2, 1280, 40, 40

	m = ForegroundMatting(c).to(device)

	print(m)


	reference_image = torch.randn(b, c, h, w).to(device)
	dense_flow = torch.randn(b, 2, h, w).to(device)
	warped_image = torch.randn(b, c, h, w).to(device)

	o = m(reference_image, dense_flow, warped_image)