Spaces:

toninio19
/

keysync-demo

Running on Zero

Antoni Bigata

first commit

b5ce381 16 days ago

15.9 kB

	# Copyright (c) OpenMMLab. All rights reserved.

	"""
	Functions taken from https://github.com/DanBigioi/DiffusionVideoEditing


	"""

	import cv2
	import numpy as np
	import torch

	" Countour from 2:15 not good for head poses "


	def face_mask(img_shape, landmark_list, dtype="uint8"):
	height, width = img_shape[:2]
	mask = np.ones((height, width, 1), dtype=dtype)
	cv2.drawContours(
	mask, np.int32([landmark_list[2:15]]), -1, color=(0), thickness=cv2.FILLED
	)

	return mask


	def face_mask_jaw_box(img_shape, landmark_list, dtype="uint8", kernel_size=10):
	nose = 33
	jaw = 8

	height, width = img_shape[:2]
	mask = np.ones((height, width, 1), dtype=dtype)
	combined_landmarks = np.concatenate((landmark_list[2:15], [landmark_list[33]]))

	# Draw the combined contour on the mask
	cv2.drawContours(
	mask, [np.int32(combined_landmarks)], -1, color=(0), thickness=cv2.FILLED
	)

	inverted_mask = 1 - mask
	kernel = np.ones((kernel_size, kernel_size), np.uint8)
	mask = cv2.dilate(inverted_mask, kernel, iterations=1)
	mask = np.expand_dims(
	mask, axis=-1
	) # Add a singleton dimension to match the number of channels
	mask = 1 - mask

	cut_h = landmark_list[nose][1]

	far_left = int(np.argmin(landmark_list[:, 0]))
	far_right = int(np.argmax(landmark_list[:, 0]))
	left_up_point = np.int32([landmark_list[far_left][0], cut_h]) # 2
	right_up_point = np.int32([landmark_list[far_right][0], cut_h]) # 15
	height_landmarks = min(landmark_list[jaw, 1] + 20, height)
	left_down_point = np.int32([landmark_list[far_left][0], height_landmarks])
	right_down_point = np.int32([landmark_list[far_right][0], height_landmarks])

	# print(cut_h, cut_h + 10, height_landmarks)

	mask_box = [left_up_point, left_down_point, right_down_point, right_up_point]

	return mask, mask_box


	" Stretch the tight face mask - Countour from 2:15 but dilate, not good for extreme head poses "


	def face_mask_stretch(img_shape, landmark_list, dtype="uint8", kernel_size=10):
	height, width = img_shape[:2]
	mask = np.ones((height, width, 1), dtype=dtype)
	combined_landmarks = np.concatenate((landmark_list[2:15], [landmark_list[33]]))

	# Draw the combined contour on the mask
	cv2.drawContours(
	mask, [np.int32(combined_landmarks)], -1, color=(0), thickness=cv2.FILLED
	)

	# cv2.drawContours(mask, np.int32([landmark_list[2:15]]), -1, color=(0), thickness=cv2.FILLED)
	inverted_mask = 1 - mask

	kernel = np.ones((kernel_size, kernel_size), np.uint8)
	mask = cv2.dilate(inverted_mask, kernel, iterations=1)
	mask = np.expand_dims(
	mask, axis=-1
	) # Add a singleton dimension to match the number of channels
	mask = 1 - mask

	return mask


	" Small box around mouth - Use far left, far right points for extreme head poses, cut between nose and upper mouth point"


	def face_mask_box_pose(img_shape, landmark_list, dtype="uint8"):
	"""
	When the head pose is different than frontal then the normal cropping with landmarks does not work correctly.
	Crop using as height the middle nose point
	Take the left/right corners using the far_left and far_right landmarks
	TODO: Maybe it is better to add some more pixels to have a bigger mask, especially on large head poses
	"""

	height, width = img_shape[:2]

	nose = 33
	upper_lip = 51
	jaw = 8

	nose_point_h = landmark_list[nose, 1]
	upper_lip_point = landmark_list[upper_lip, 1]
	cut_h = (upper_lip_point - nose_point_h) / 2 + nose_point_h

	# cut_h = landmark_list[nose][1]

	mask = np.ones((height, width, 1), dtype=dtype)

	far_left = int(np.argmin(landmark_list[:, 0]))
	far_right = int(np.argmax(landmark_list[:, 0]))

	left_up_point = np.int32([landmark_list[far_left][0], cut_h]) # 2
	right_up_point = np.int32([landmark_list[far_right][0], cut_h]) # 15

	height_landmarks = min(landmark_list[jaw, 1] + 20, height)
	left_down_point = np.int32([landmark_list[far_left][0], height_landmarks])
	right_down_point = np.int32([landmark_list[far_right][0], height_landmarks])

	cv2.drawContours(
	mask,
	np.int32(
	[
	[
	left_up_point,
	left_down_point,
	right_up_point,
	right_down_point,
	left_up_point,
	right_up_point,
	left_down_point,
	right_down_point,
	]
	]
	),
	-1,
	color=(0),
	thickness=cv2.FILLED,
	)

	return mask


	" Small box around mouth - Use far left, far right points for extreme head poses, cut from nose"


	def face_mask_box_pose_nose(
	img_shape,
	landmark_list,
	dtype="uint8",
	get_box=False,
	pixels_above_nose=None,
	pixels_under_jaw=None,
	):
	height, width = img_shape[:2]

	nose = 33
	jaw = 8

	cut_h = landmark_list[nose][1]
	if pixels_above_nose is not None:
	# this is only for inference to take a bigger mask and blend it back to the original frame
	cut_h = cut_h - pixels_above_nose

	mask = np.ones((height, width, 1), dtype=dtype)

	far_left = int(np.argmin(landmark_list[:, 0]))
	far_right = int(np.argmax(landmark_list[:, 0]))

	left_up_point = np.int32([landmark_list[far_left][0], cut_h]) # 2
	right_up_point = np.int32([landmark_list[far_right][0], cut_h]) # 15

	height_landmarks = min(landmark_list[jaw, 1] + 20, height)
	if pixels_under_jaw is not None:
	height_landmarks = min(landmark_list[jaw, 1] + pixels_under_jaw, height)
	left_down_point = np.int32([landmark_list[far_left][0], height_landmarks])
	right_down_point = np.int32([landmark_list[far_right][0], height_landmarks])

	cv2.drawContours(
	mask,
	np.int32(
	[
	[
	left_up_point,
	left_down_point,
	right_up_point,
	right_down_point,
	left_up_point,
	right_up_point,
	left_down_point,
	right_down_point,
	]
	]
	),
	-1,
	color=(0),
	thickness=cv2.FILLED,
	)

	if get_box:
	mask_box = [left_up_point, left_down_point, right_down_point, right_up_point]
	return mask, mask_box
	else:
	return mask


	def face_mask_box_pose_big(
	img_shape, landmark_list, dtype="uint8", cut_h=None, far_left=None, far_right=None
	):
	height, width = img_shape[:2]
	mask = np.ones((height, width, 1), dtype=dtype)
	nose = 33
	nose_point_h = landmark_list[nose, 1]
	if cut_h is None:
	cut_h = nose_point_h

	if far_right is None and far_left is None:
	far_left = int(np.argmin(landmark_list[:, 0]))
	far_right = int(np.argmax(landmark_list[:, 0]))

	left_up_point = np.int32([landmark_list[far_left][0], cut_h])
	left_down_point = np.int32([landmark_list[far_left][0], height])

	right_up_point = np.int32([landmark_list[far_right][0], cut_h])
	right_down_point = np.int32([landmark_list[far_right][0], height])
	else:
	left_up_point = np.int32([far_left, cut_h])
	left_down_point = np.int32([far_left, height])

	right_up_point = np.int32([far_right, cut_h])
	right_down_point = np.int32([far_right, height])

	cv2.drawContours(
	mask,
	np.int32(
	[
	[
	left_up_point,
	left_down_point,
	right_up_point,
	right_down_point,
	left_up_point,
	right_up_point,
	left_down_point,
	right_down_point,
	]
	]
	),
	-1,
	color=(0),
	thickness=cv2.FILLED,
	)

	return mask


	def face_mask_box_pose_big_cover_nose(img_shape, landmark_list, dtype="uint8"):
	height, width = img_shape[:2]

	middle_nose_point = 29

	cut_h = landmark_list[middle_nose_point, 1]

	mask = np.ones((height, width, 1), dtype=dtype)

	far_left = int(np.argmin(landmark_list[:, 0]))
	far_right = int(np.argmax(landmark_list[:, 0]))

	left_up_point = np.int32([landmark_list[far_left][0], cut_h])
	left_down_point = np.int32([landmark_list[far_left][0], height])

	right_up_point = np.int32([landmark_list[far_right][0], cut_h])
	right_down_point = np.int32([landmark_list[far_right][0], height])

	cv2.drawContours(
	mask,
	np.int32(
	[
	[
	left_up_point,
	left_down_point,
	right_up_point,
	right_down_point,
	left_up_point,
	right_up_point,
	left_down_point,
	right_down_point,
	]
	]
	),
	-1,
	color=(0),
	thickness=cv2.FILLED,
	)

	return mask


	def face_mask_square(img_shape, landmark_list, dtype="uint8"):
	height, width = img_shape[:2]

	mask = np.ones((height, width, 1), dtype=dtype)

	far_left = np.min(landmark_list[:, 0])
	far_right = np.max(landmark_list[:, 1])
	print("far_left {}, far_right {}".format(far_left, far_right))

	left_p = 2
	right_p = 14

	print(
	"left_p {}, right_p {}".format(
	landmark_list[left_p][0], landmark_list[right_p][0]
	)
	)

	cv2.drawContours(
	mask,
	np.int32(
	[
	[
	landmark_list[left_p],
	[landmark_list[left_p][0], height],
	landmark_list[right_p],
	[landmark_list[right_p][0], height],
	landmark_list[left_p],
	landmark_list[right_p],
	[landmark_list[left_p][0], height],
	[landmark_list[right_p][0], height],
	]
	]
	),
	-1,
	color=(0),
	thickness=cv2.FILLED,
	)

	return mask


	" Used for half face "


	def bbox2mask(img_shape, bbox, dtype="uint8"):
	"""Generate mask in ndarray from bbox.

	The returned mask has the shape of (h, w, 1). '1' indicates the
	hole and '0' indicates the valid regions.

	We prefer to use `uint8` as the data type of masks, which may be different
	from other codes in the community.

	Args:
	img_shape (tuple[int]): The size of the image.
	bbox (tuple[int]): Configuration tuple, (top, left, height, width)
	dtype (str): Indicate the data type of returned masks. Default: 'uint8'

	Return:
	numpy.ndarray: Mask in the shape of (h, w, 1).
	"""

	height, width = img_shape[:2]

	mask = np.ones((height, width, 1), dtype=dtype)
	mask[bbox[0] : bbox[0] + bbox[2], bbox[1] : bbox[1] + bbox[3], :] = 0.0

	return mask


	def face_mask_cheeks(img_shape, landmark_list, dtype="uint8"):
	height, width = img_shape[:2]
	mask = np.ones((height, width, 1), dtype=dtype)

	middle_nose_point = 29
	nose = 33
	cut_h = int(landmark_list[middle_nose_point, 1])

	far_left = int(np.argmin(landmark_list[:, 0]))
	far_right = int(np.argmax(landmark_list[:, 0]))

	left_up_point = np.int32([landmark_list[far_left][0], cut_h])
	left_down_point = np.int32([landmark_list[far_left][0], height])

	right_up_point = np.int32([landmark_list[far_right][0], cut_h])
	right_down_point = np.int32([landmark_list[far_right][0], height])

	cv2.drawContours(
	mask,
	np.int32(
	[
	[
	left_up_point,
	left_down_point,
	right_up_point,
	right_down_point,
	left_up_point,
	right_up_point,
	left_down_point,
	right_down_point,
	]
	]
	),
	-1,
	color=(0),
	thickness=cv2.FILLED,
	)

	# Calculate the bounding box coordinates for the nose
	nose_jaw_dist = (
	abs(landmark_list[2][0] - landmark_list[middle_nose_point][0]) * 0.10
	) # 1, 15
	# nose_right_dist = (landmark_list[middle_nose_point][0] - landmark_list[1][0]) * 0.10
	# nose_left_dist = (landmark_list[15][0] - landmark_list[middle_nose_point][0]) * 0.10
	#

	nose_min_x = int(landmark_list[31][0] - nose_jaw_dist)
	nose_max_x = int(landmark_list[35][0] + nose_jaw_dist)
	# nose_min_x = int(landmark_list[31][0] - nose_right_dist)
	# nose_max_x = int(landmark_list[35][0] + nose_left_dist)
	nose_min_y = cut_h
	nose_max_y = int(landmark_list[nose, 1])

	# Clear the nose area from the mask using a rectangle
	mask_nose = np.ones((height, width, 1), dtype=dtype)
	cv2.rectangle(
	mask_nose,
	(nose_min_x, nose_min_y),
	(nose_max_x, nose_max_y),
	color=(0),
	thickness=cv2.FILLED,
	)

	mask_nose = 1 - mask_nose
	mask = mask + mask_nose

	return mask


	def face_mask_cheeks_batch(
	img_shape, landmark_list, dtype="uint8", box_expand=0.0, show_nose=True
	):
	height, width = img_shape[:2]

	# Handle both single and multiple landmarks
	if len(landmark_list.shape) == 2:
	landmark_list = landmark_list[None, ...] # Add batch dimension
	num_frames = landmark_list.shape[0]

	# Initialize masks for all frames
	masks = np.ones((num_frames, height, width), dtype=dtype)

	for i in range(num_frames):
	landmarks = landmark_list[i]
	middle_nose_point = 29
	nose = 33
	cut_h = int(landmarks[middle_nose_point, 1])

	# Add height expansion
	if box_expand > 0:
	cut_h = max(0, cut_h - int(box_expand * height))

	far_left = int(np.argmin(landmarks[:, 0]))
	far_right = int(np.argmax(landmarks[:, 0]))

	left_up_point = np.int32([landmarks[far_left][0], cut_h])
	left_down_point = np.int32([landmarks[far_left][0], height])

	right_up_point = np.int32([landmarks[far_right][0], cut_h])
	right_down_point = np.int32([landmarks[far_right][0], height])

	cv2.drawContours(
	masks[i],
	np.int32(
	[
	[
	left_up_point,
	left_down_point,
	right_up_point,
	right_down_point,
	left_up_point,
	right_up_point,
	left_down_point,
	right_down_point,
	]
	]
	),
	-1,
	color=(0),
	thickness=cv2.FILLED,
	)

	if show_nose:
	# Calculate the bounding box coordinates for the nose
	nose_jaw_dist = (
	abs(landmarks[2][0] - landmarks[middle_nose_point][0]) * 0.10
	) # 1, 15

	nose_min_x = int(landmarks[31][0] - nose_jaw_dist)
	nose_max_x = int(landmarks[35][0] + nose_jaw_dist)
	nose_min_y = cut_h
	nose_max_y = int(landmarks[nose, 1])

	# Clear the nose area from the mask using a rectangle
	mask_nose = np.ones((height, width), dtype=dtype)
	cv2.rectangle(
	mask_nose,
	(nose_min_x, nose_min_y),
	(nose_max_x, nose_max_y),
	color=(0),
	thickness=cv2.FILLED,
	)

	mask_nose = 1 - mask_nose
	masks[i] = masks[i] + mask_nose

	# If input was single frame, return single mask
	if landmark_list.shape[0] == 1:
	return masks[0]

	return 1 - torch.from_numpy(masks)