Spaces:

DragGan
/

DragGan-Inversion

Runtime error

App Files Files Community

DragGan-Inversion / stylegan_human /alignment.py

radames

first

bb0f5a9 over 1 year ago

raw

history blame

9.44 kB

	# Copyright (c) SenseTime Research. All rights reserved.


	import os
	import argparse
	import numpy as np
	import torch
	from torch.utils.data import DataLoader
	from torchvision.transforms import transforms
	from utils.ImagesDataset import ImagesDataset

	import cv2
	import time
	import copy
	import imutils

	# for openpose body keypoint detector : # (src:https://github.com/Hzzone/pytorch-openpose)
	from openpose.src import util
	from openpose.src.body import Body

	# for paddlepaddle human segmentation : #(src: https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.5/contrib/PP-HumanSeg/)
	from PP_HumanSeg.deploy.infer import Predictor as PP_HumenSeg_Predictor

	import math


	def angle_between_points(p0, p1, p2):
	if p0[1] == -1 or p1[1] == -1 or p2[1] == -1:
	return -1
	a = (p1[0]-p0[0])2 + (p1[1]-p0[1])2
	b = (p1[0]-p2[0])2 + (p1[1]-p2[1])2
	c = (p2[0]-p0[0])2 + (p2[1]-p0[1])2
	if a * b == 0:
	return -1
	return math.acos((a+b-c) / math.sqrt(4ab)) * 180 / math.pi


	def crop_img_with_padding(img, keypoints, rect):
	person_xmin, person_xmax, ymin, ymax = rect
	img_h, img_w, _ = img.shape # find body center using keypoints
	middle_shoulder_x = keypoints[1][0]
	middle_hip_x = (keypoints[8][0] + keypoints[11][0]) // 2
	mid_x = (middle_hip_x + middle_shoulder_x) // 2
	mid_y = (ymin + ymax) // 2
	# find which side (l or r) is further than center x, use the further side
	if abs(mid_x-person_xmin) > abs(person_xmax-mid_x): # left further
	xmin = person_xmin
	xmax = mid_x + (mid_x-person_xmin)
	else:
	# may be negtive
	# in this case, the script won't output any image, leave the case like this
	# since we don't want to pad human body
	xmin = mid_x - (person_xmax-mid_x)
	xmax = person_xmax

	w = xmax - xmin
	h = ymax - ymin
	# pad rectangle to w:h = 1:2 ## calculate desired border length
	if h / w >= 2: # pad horizontally
	target_w = h // 2
	xmin_prime = int(mid_x - target_w / 2)
	xmax_prime = int(mid_x + target_w / 2)
	if xmin_prime < 0:
	pad_left = abs(xmin_prime) # - xmin
	xmin = 0
	else:
	pad_left = 0
	xmin = xmin_prime
	if xmax_prime > img_w:
	pad_right = xmax_prime - img_w
	xmax = img_w
	else:
	pad_right = 0
	xmax = xmax_prime

	cropped_img = img[int(ymin):int(ymax), int(xmin):int(xmax)]
	im_pad = cv2.copyMakeBorder(cropped_img, 0, 0, int(
	pad_left), int(pad_right), cv2.BORDER_REPLICATE)
	else: # pad vertically
	target_h = w * 2
	ymin_prime = mid_y - (target_h / 2)
	ymax_prime = mid_y + (target_h / 2)
	if ymin_prime < 0:
	pad_up = abs(ymin_prime) # - ymin
	ymin = 0
	else:
	pad_up = 0
	ymin = ymin_prime
	if ymax_prime > img_h:
	pad_down = ymax_prime - img_h
	ymax = img_h
	else:
	pad_down = 0
	ymax = ymax_prime
	print(ymin, ymax, xmin, xmax, img.shape)

	cropped_img = img[int(ymin):int(ymax), int(xmin):int(xmax)]
	im_pad = cv2.copyMakeBorder(cropped_img, int(pad_up), int(pad_down), 0,
	0, cv2.BORDER_REPLICATE)
	result = cv2.resize(im_pad, (512, 1024), interpolation=cv2.INTER_AREA)
	return result


	def run(args):
	os.makedirs(args.output_folder, exist_ok=True)
	dataset = ImagesDataset(
	args.image_folder, transforms.Compose([transforms.ToTensor()]))
	dataloader = DataLoader(dataset, batch_size=1, shuffle=False)

	body_estimation = Body('openpose/model/body_pose_model.pth')

	total = len(dataloader)
	print('Num of dataloader : ', total)
	os.makedirs(f'{args.output_folder}', exist_ok=True)
	# os.makedirs(f'{args.output_folder}/middle_result', exist_ok=True)

	# initialzide HumenSeg
	human_seg_args = {}
	human_seg_args['cfg'] = 'PP_HumanSeg/export_model/deeplabv3p_resnet50_os8_humanseg_512x512_100k_with_softmax/deploy.yaml'
	human_seg_args['input_shape'] = [1024, 512]
	human_seg_args['save_dir'] = args.output_folder
	human_seg_args['soft_predict'] = False
	human_seg_args['use_gpu'] = True
	human_seg_args['test_speed'] = False
	human_seg_args['use_optic_flow'] = False
	human_seg_args['add_argmax'] = True
	human_seg_args = argparse.Namespace(**human_seg_args)
	human_seg = PP_HumenSeg_Predictor(human_seg_args)

	from tqdm import tqdm
	for fname, image in tqdm(dataloader):
	# try:
	# tensor to numpy image
	fname = fname[0]
	print(f'Processing \'{fname}\'.')

	image = (image.permute(0, 2, 3, 1) * 255).clamp(0, 255)
	image = image.squeeze(0).numpy() # --> tensor to numpy, (H,W,C)
	# avoid super high res img
	if image.shape[0] >= 2000: # height ### for shein image
	ratio = image.shape[0]/1200 # height
	dim = (int(image.shape[1]/ratio), 1200) # (width, height)
	image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA)
	image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

	# create segmentation
	# mybg = cv2.imread('mybg.png')
	comb, segmentation, bg, ori_img = human_seg.run(image, None) # mybg)
	# cv2.imwrite('comb.png',comb) # [0,255]
	# cv2.imwrite('alpha.png',segmentation*255) # segmentation [0,1] --> [0.255]
	# cv2.imwrite('bg.png',bg) #[0,255]
	# cv2.imwrite('ori_img.png',ori_img) # [0,255]

	masks_np = (segmentation * 255) # .byte().cpu().numpy() #1024,512,1
	mask0_np = masks_np[:, :, 0].astype(np.uint8) # [0, :, :]
	contours = cv2.findContours(
	mask0_np, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	cnts = imutils.grab_contours(contours)
	c = max(cnts, key=cv2.contourArea)
	extTop = tuple(c[c[:, :, 1].argmin()][0])
	extBot = tuple(c[c[:, :, 1].argmax()][0])
	extBot = list(extBot)
	extTop = list(extTop)
	pad_range = int((extBot[1]-extTop[1])*0.05)
	# seg mask already reaches to the edge
	if (int(extTop[1]) <= 5 and int(extTop[1]) > 0) and (comb.shape[0] > int(extBot[1]) and int(extBot[1]) >= comb.shape[0]-5):
	# pad with pure white, top 100 px, bottom 100 px
	comb = cv2.copyMakeBorder(
	comb, pad_range+5, pad_range+5, 0, 0, cv2.BORDER_CONSTANT, value=[255, 255, 255])
	elif int(extTop[1]) <= 0 or int(extBot[1]) >= comb.shape[0]:
	print('PAD: body out of boundary', fname) # should not happened
	return {}
	else:
	# 105 instead of 100: give some extra space
	comb = cv2.copyMakeBorder(
	comb, pad_range+5, pad_range+5, 0, 0, cv2.BORDER_REPLICATE)
	extBot[1] = extBot[1] + pad_range+5
	extTop[1] = extTop[1] + pad_range+5

	extLeft = tuple(c[c[:, :, 0].argmin()][0])
	extRight = tuple(c[c[:, :, 0].argmax()][0])
	extLeft = list(extLeft)
	extRight = list(extRight)
	person_ymin = int(extTop[1])-pad_range # 100
	person_ymax = int(extBot[1])+pad_range # 100 #height
	if person_ymin < 0 or person_ymax > comb.shape[0]: # out of range
	return {}
	person_xmin = int(extLeft[0])
	person_xmax = int(extRight[0])
	rect = [person_xmin, person_xmax, person_ymin, person_ymax]
	# recimg = copy.deepcopy(comb)
	# cv2.rectangle(recimg,(person_xmin,person_ymin),(person_xmax,person_ymax),(0,255,0),2)
	# cv2.imwrite(f'{args.output_folder}/middle_result/{fname}_rec.png',recimg)

	# detect keypoints
	keypoints, subset = body_estimation(comb)
	# print(keypoints, subset, len(subset))
	if len(subset) != 1 or (len(subset) == 1 and subset[0][-1] < 15):
	print(
	f'Processing \'{fname}\'. Please import image contains one person only. Also can check segmentation mask. ')
	continue

	# canvas = copy.deepcopy(comb)
	# canvas = util.draw_bodypose(canvas, keypoints, subset, show_number=True)
	# cv2.imwrite(f'{args.output_folder}/middle_result/{fname}_keypoints.png',canvas)

	comb = crop_img_with_padding(comb, keypoints, rect)

	cv2.imwrite(f'{args.output_folder}/{fname}.png', comb)
	print(f' -- Finished processing \'{fname}\'. --')
	# except:
	# print(f'Processing \'{fname}\'. Not satisfied the alignment strategy.')


	if __name__ == '__main__':
	torch.backends.cudnn.benchmark = True
	torch.backends.cudnn.deterministic = False

	t1 = time.time()
	arg_formatter = argparse.ArgumentDefaultsHelpFormatter
	description = 'StyleGAN-Human data process'
	parser = argparse.ArgumentParser(formatter_class=arg_formatter,
	description=description)
	parser.add_argument('--image-folder', type=str, dest='image_folder')
	parser.add_argument('--output-folder',
	dest='output_folder', default='results', type=str)
	# parser.add_argument('--cfg', dest='cfg for segmentation', default='PP_HumanSeg/export_model/ppseg_lite_portrait_398x224_with_softmax/deploy.yaml', type=str)

	print('parsing arguments')
	cmd_args = parser.parse_args()
	run(cmd_args)

	print('total time elapsed: ', str(time.time() - t1))