|
import cv2 |
|
import os |
|
import glob |
|
import shutil |
|
import numpy as np |
|
import math |
|
|
|
|
|
|
|
def mean_pixel_distance(left: np.ndarray, right: np.ndarray) -> float: |
|
"""Return the mean average distance in pixel values between `left` and `right`. |
|
Both `left and `right` should be 2 dimensional 8-bit images of the same shape. |
|
""" |
|
assert len(left.shape) == 2 and len(right.shape) == 2 |
|
assert left.shape == right.shape |
|
num_pixels: float = float(left.shape[0] * left.shape[1]) |
|
return (np.sum(np.abs(left.astype(np.int32) - right.astype(np.int32))) / num_pixels) |
|
|
|
|
|
def estimated_kernel_size(frame_width: int, frame_height: int) -> int: |
|
"""Estimate kernel size based on video resolution.""" |
|
size: int = 4 + round(math.sqrt(frame_width * frame_height) / 192) |
|
if size % 2 == 0: |
|
size += 1 |
|
return size |
|
|
|
_kernel = None |
|
|
|
def _detect_edges(lum: np.ndarray) -> np.ndarray: |
|
global _kernel |
|
"""Detect edges using the luma channel of a frame. |
|
Arguments: |
|
lum: 2D 8-bit image representing the luma channel of a frame. |
|
Returns: |
|
2D 8-bit image of the same size as the input, where pixels with values of 255 |
|
represent edges, and all other pixels are 0. |
|
""" |
|
|
|
if _kernel is None: |
|
kernel_size = estimated_kernel_size(lum.shape[1], lum.shape[0]) |
|
_kernel = np.ones((kernel_size, kernel_size), np.uint8) |
|
|
|
|
|
sigma: float = 1.0 / 3.0 |
|
median = np.median(lum) |
|
low = int(max(0, (1.0 - sigma) * median)) |
|
high = int(min(255, (1.0 + sigma) * median)) |
|
|
|
|
|
|
|
|
|
edges = cv2.Canny(lum, low, high) |
|
return cv2.dilate(edges, _kernel) |
|
|
|
|
|
|
|
def detect_edges(img_path, mask_path, is_invert_mask): |
|
im = cv2.imread(img_path) |
|
if mask_path: |
|
mask = cv2.imread(mask_path)[:,:,0] |
|
mask = mask[:, :, np.newaxis] |
|
im = im * ( (mask == 0) if is_invert_mask else (mask > 0) ) |
|
|
|
|
|
|
|
|
|
hue, sat, lum = cv2.split(cv2.cvtColor( im , cv2.COLOR_BGR2HSV)) |
|
return _detect_edges(lum) |
|
|
|
def get_mask_path_of_img(img_path, mask_dir): |
|
img_basename = os.path.basename(img_path) |
|
mask_path = os.path.join( mask_dir , img_basename ) |
|
return mask_path if os.path.isfile( mask_path ) else None |
|
|
|
def analyze_key_frames(png_dir, mask_dir, th, min_gap, max_gap, add_last_frame, is_invert_mask): |
|
keys = [] |
|
|
|
frames = sorted(glob.glob( os.path.join(png_dir, "[0-9]*.png") )) |
|
|
|
key_frame = frames[0] |
|
keys.append( int(os.path.splitext(os.path.basename(key_frame))[0]) ) |
|
key_edges = detect_edges( key_frame, get_mask_path_of_img( key_frame, mask_dir ), is_invert_mask ) |
|
gap = 0 |
|
|
|
for frame in frames: |
|
gap += 1 |
|
if gap < min_gap: |
|
continue |
|
|
|
edges = detect_edges( frame, get_mask_path_of_img( frame, mask_dir ), is_invert_mask ) |
|
|
|
delta = mean_pixel_distance( edges, key_edges ) |
|
|
|
_th = th * (max_gap - gap)/max_gap |
|
|
|
if _th < delta: |
|
basename_without_ext = os.path.splitext(os.path.basename(frame))[0] |
|
keys.append( int(basename_without_ext) ) |
|
key_frame = frame |
|
key_edges = edges |
|
gap = 0 |
|
|
|
if add_last_frame: |
|
basename_without_ext = os.path.splitext(os.path.basename(frames[-1]))[0] |
|
last_frame = int(basename_without_ext) |
|
if not last_frame in keys: |
|
keys.append( last_frame ) |
|
|
|
return keys |
|
|
|
def remove_pngs_in_dir(path): |
|
if not os.path.isdir(path): |
|
return |
|
|
|
pngs = glob.glob( os.path.join(path, "*.png") ) |
|
for png in pngs: |
|
os.remove(png) |
|
|
|
def ebsynth_utility_stage2(dbg, project_args, key_min_gap, key_max_gap, key_th, key_add_last_frame, is_invert_mask): |
|
dbg.print("stage2") |
|
dbg.print("") |
|
|
|
_, original_movie_path, frame_path, frame_mask_path, org_key_path, _, _ = project_args |
|
|
|
remove_pngs_in_dir(org_key_path) |
|
os.makedirs(org_key_path, exist_ok=True) |
|
|
|
fps = 30 |
|
clip = cv2.VideoCapture(original_movie_path) |
|
if clip: |
|
fps = clip.get(cv2.CAP_PROP_FPS) |
|
clip.release() |
|
|
|
if key_min_gap == -1: |
|
key_min_gap = int(10 * fps/30) |
|
else: |
|
key_min_gap = max(1, key_min_gap) |
|
key_min_gap = int(key_min_gap * fps/30) |
|
|
|
if key_max_gap == -1: |
|
key_max_gap = int(300 * fps/30) |
|
else: |
|
key_max_gap = max(10, key_max_gap) |
|
key_max_gap = int(key_max_gap * fps/30) |
|
|
|
key_min_gap,key_max_gap = (key_min_gap,key_max_gap) if key_min_gap < key_max_gap else (key_max_gap,key_min_gap) |
|
|
|
dbg.print("fps: {}".format(fps)) |
|
dbg.print("key_min_gap: {}".format(key_min_gap)) |
|
dbg.print("key_max_gap: {}".format(key_max_gap)) |
|
dbg.print("key_th: {}".format(key_th)) |
|
|
|
keys = analyze_key_frames(frame_path, frame_mask_path, key_th, key_min_gap, key_max_gap, key_add_last_frame, is_invert_mask) |
|
|
|
dbg.print("keys : " + str(keys)) |
|
|
|
for k in keys: |
|
filename = str(k).zfill(5) + ".png" |
|
shutil.copy( os.path.join( frame_path , filename) , os.path.join(org_key_path, filename) ) |
|
|
|
|
|
dbg.print("") |
|
dbg.print("Keyframes are output to [" + org_key_path + "]") |
|
dbg.print("") |
|
dbg.print("[Ebsynth Utility]->[configuration]->[stage 2]->[Threshold of delta frame edge]") |
|
dbg.print("The smaller this value, the narrower the keyframe spacing, and if set to 0, the keyframes will be equally spaced at the value of [Minimum keyframe gap].") |
|
dbg.print("") |
|
dbg.print("If you do not like the selection, you can modify it manually.") |
|
dbg.print("(Delete keyframe, or Add keyframe from ["+frame_path+"])") |
|
|
|
dbg.print("") |
|
dbg.print("completed.") |
|
|
|
|