import os os.system("git clone https://github.com/google-research/frame-interpolation") import sys sys.path.append("frame-interpolation") import math import cv2 import numpy as np import tensorflow as tf import mediapy from PIL import Image import gradio as gr from huggingface_hub import snapshot_download from image_tools.sizes import resize_and_crop from pymatting import cutout model = snapshot_download(repo_id="akhaliq/frame-interpolation-film-style") from eval import interpolator, util interpolator = interpolator.Interpolator(model, None) ffmpeg_path = util.get_ffmpeg_path() mediapy.set_ffmpeg(ffmpeg_path) fl_ = "" fl_mask = "" def do_interpolation(frame1, frame2, interpolation, n): print("tween frames: " + str(interpolation)) print(frame1, frame2) input_frames = [frame1, frame2] frames = list( util.interpolate_recursively_from_files( input_frames, int(interpolation), interpolator)) #print(frames) mediapy.write_video(f"{n}_to_{n+1}_out.mp4", frames, fps=25) return f"{n}_to_{n+1}_out.mp4" def get_frames(video_in, step, name, n): frames = [] cap = cv2.VideoCapture(video_in) cframes = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) cfps = int(cap.get(cv2.CAP_PROP_FPS)) print(f'frames: {cframes}, fps: {cfps}') #resize the video #clip = VideoFileClip(video_in) #check fps #if cfps > 25: # print("video rate is over 25, resetting to 25") # clip_resized = clip.resize(height=1024) # clip_resized.write_videofile("video_resized.mp4", fps=25) #else: # print("video rate is OK") # clip_resized = clip.resize(height=1024) # clip_resized.write_videofile("video_resized.mp4", fps=cfps) #print("video resized to 1024 height") # Opens the Video file with CV2 #cap = cv2.VideoCapture("video_resized.mp4") fps = cap.get(cv2.CAP_PROP_FPS) print("video fps: " + str(fps)) i=0 while(cap.isOpened()): ret, frame = cap.read() if ret == False: break #if resize_w > 0: #resize_h = resize_w / 2.0 #frame = cv2.resize(frame, (int(resize_w), int(resize_h))) cv2.imwrite(f"{str(n)}_{name}_{step}{str(i)}.png", frame) frames.append(f"{str(n)}_{name}_{step}{str(i)}.png") i+=1 cap.release() cv2.destroyAllWindows() print("broke the video into frames") return frames, fps def create_video(frames, fps, type): print("building video result") imgs = [] for j, img in enumerate(frames): imgs.append(cv2.cvtColor(cv2.imread(img).astype(np.uint8), cv2.COLOR_BGR2RGB)) mediapy.write_video(type + "_result.mp4", imgs, fps=fps) return type + "_result.mp4" def infer(f_in, interpolation, fps_output): fps_output = logscale(fps_output) # 1. break video into frames and get FPS #break_vid = get_frames(url_in, "vid_input_frame", "origin", resize_n) frames_list = f_in #break_vid[0] fps = 1 #break_vid[1] print(f"ORIGIN FPS: {fps}") n_frame = int(300) #limited to 300 frames #n_frame = len(frames_list) if n_frame >= len(frames_list): print("video is shorter than the cut value") n_frame = len(frames_list) # 2. prepare frames result arrays result_frames = [] print("set stop frames to: " + str(n_frame)) for idx, frame in enumerate(frames_list[0:int(n_frame)]): if idx < len(frames_list) - 1: next_frame = frames_list[idx+1] interpolated_frames = do_interpolation(frame, next_frame, interpolation, idx) # should return a list of interpolated frames break_interpolated_video = get_frames(interpolated_frames, "interpol", f"{idx}_", -1) print(break_interpolated_video[0]) for j, img in enumerate(break_interpolated_video[0][0:len(break_interpolated_video[0])-1]): print(f"IMG:{img}") os.rename(img, f"{idx}_to_{idx+1}_{j}.png") result_frames.append(f"{idx}_to_{idx+1}_{j}.png") print("frames " + str(idx) + " & " + str(idx+1) + "/" + str(n_frame) + ": done;") #print(f"CURRENT FRAMES: {result_frames}") result_frames.append(f"{frames_list[n_frame-1]}") final_vid = create_video(result_frames, fps_output, "interpolated") files = final_vid print("interpolated frames: " + str(len(frames_list)) + " -> " + str(len(result_frames))) cv2.destroyAllWindows() return final_vid, files def logscale(linear): return int(math.pow(2, linear)) def linscale(linear): return int(math.log2(linear)) def remove_bg(fl, count, mh, ms, md, lm, b, d): global fl_ fr = cv2.imread(fl).astype(np.uint8) #b = 3 #element = cv2.getStructuringElement(cv2.MORPH_RECT, (2 * b + 1, 2 * b + 1), (b, b)) n = int((fr.shape[0]*fr.shape[1]) / (256*256)) fr_bg = cv2.medianBlur(fr, 255) for i in range(0, n): fr_bg = cv2.medianBlur(fr_bg, 255) fr_diff = cv2.convertScaleAbs(fr.astype(np.int16)-fr_bg.astype(np.int16)).astype(np.uint8) hsv = cv2.cvtColor(fr_diff, cv2.COLOR_BGR2HSV) # range: 180, 255, 255 fr_diff = cv2.cvtColor(fr_diff, cv2.COLOR_BGR2GRAY) if lm == "median": mh = np.median(hsv[:,:,0]) ms = np.median(hsv[:,:,1]) md = np.median(hsv[:,:,2]) elif lm == "average": mh = np.average(hsv[:,:,0]) ms = np.average(hsv[:,:,1]) md = np.average(hsv[:,:,2]) bg = cv2.inRange(hsv, np.array([0,0,0]), np.array([mh,ms,md])) fr_diff[bg>0] = 0 fr_diff[bg==0] = 255 cv2.rectangle(fr_diff,(0,0),(fr_diff.shape[1]-1,fr_diff.shape[0]-1),(255,255,255),1) mask = cv2.floodFill(fr_diff, None, (0, 0), 255, 0, 0, (4 | cv2.FLOODFILL_FIXED_RANGE))[2] #(4 | cv.FLOODFILL_FIXED_RANGE | cv.FLOODFILL_MASK_ONLY | 255 << 8) # 255 << 8 tells to fill with the value 255) mask = mask[1:mask.shape[0]-1, 1:mask.shape[1]-1] fr_diff[mask>0] = 0 #fr_diff = cv2.dilate(cv2.erode(fr_diff, element), element) if count % 2: # odd: is photo without the flash fr_mask = cv2.cvtColor(cv2.imread(fl_).astype(np.uint8), cv2.COLOR_BGR2GRAY) fr_not = np.bitwise_not(fr_mask) fr_shadow = np.bitwise_and(fr_diff, fr_not).astype(np.uint8) fr_fg = np.bitwise_or(fr_diff, fr_mask).astype(np.uint8) cv2.imwrite(fl_, fr_mask) m = cv2.inRange(fr, np.array([240,240,240]), np.array([255,255,255])) fr[m>0] = (239,239,239) m = cv2.inRange(fr, np.array([0,0,0]), np.array([15,15,15])) fr[m>0] = (16,16,16) fr[fr_shadow>0] = (fr[fr_shadow>0] / 17).astype(np.uint8) #fr[fr_fg==0] = (255,255,255) fr_fg[fr_fg>0] = 3 #probable fg mask, bgdModel, fgdModel = cv2.grabCut(fr, fr_fg, None,None,None,65, cv2.GC_INIT_WITH_MASK) mask = np.where((mask==2)|(mask==0),0,1).astype('uint8') #fr[mask==0] = (255,255,255) cv2.imwrite(fl, fr) #b = 3 #d = 15 element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * b + 1, 2 * b + 1), (b, b)) mask_e = cv2.erode(mask, element) * 255 element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * d + 1, 2 * d + 1), (d, d)) mask_d = cv2.dilate(mask, element) * 127 mask_d[mask_e>0] = 255 cv2.imwrite(f"{str(count)}_trimask.png", mask_d.astype(np.uint8)) cutout(fl, f"{str(count)}_trimask.png", f"{str(count)}_cutout.png") a_map = cv2.imread(f"{str(count)}_cutout.png", cv2.IMREAD_UNCHANGED).astype(np.uint8) B, G, R, A = cv2.split(a_map) alpha = A / 255 alpha[A<255] = alpha[A<255] / 17 R = (255 * (1 - alpha) + R * alpha).astype(np.uint8) G = (255 * (1 - alpha) + G * alpha).astype(np.uint8) B = (255 * (1 - alpha) + B * alpha).astype(np.uint8) fr = cv2.merge((B, G, R)) cv2.imwrite(fl, fr) return fl else: # even: with the flash fl_ = fl.split(".")[0] + "_.png" cv2.imwrite(fl_, fr_diff.astype(np.uint8)) return fl_ def denoise(fl): fr = cv2.imread(fl).astype(np.uint8) fr = cv2.medianBlur(cv2.fastNlMeansDenoisingColored(fr, None, 5,5,7,21), 3) cv2.imwrite(fl, fr) return fl def sharpest(fl, i): break_vid = get_frames(fl, "vid_input_frame", "origin", i) frames = [] blur_s = [] for jdx, fr in enumerate(break_vid[0]): frames.append(cv2.imread(fr).astype(np.uint8)) blur_s.append(cv2.Laplacian(cv2.cvtColor(frames[len(frames)-1], cv2.COLOR_BGR2GRAY), cv2.CV_64F).var()) print(str(int(blur_s[jdx]))) indx = np.argmax(blur_s) fl = break_vid[0][indx] n = 25 half = int(n/2) if indx-half < 0: n = indx*2+1 elif indx+half >= len(frames): n = (len(frames)-1-indx)*2+1 #denoise frame = cv2.medianBlur(cv2.fastNlMeansDenoisingColoredMulti( srcImgs = frames, imgToDenoiseIndex = indx, temporalWindowSize = n, hColor = 5, templateWindowSize = 7, searchWindowSize = 21), 3) cv2.imwrite(fl, frame) print(str(i) +'th file, sharpest frame: '+str(indx)+', name: '+fl) return fl def sortFiles(e): e = e.split('/') return e[len(e)-1] def loadf(f, r_bg, mh, ms, md, lm, b, d): if f != None and f[0] != None: f.sort(key=sortFiles) fnew = [] for i, fl in enumerate(f): ftype = fl.split('/') if ftype[len(ftype)-1].split('.')[1] == 'mp4': fl = sharpest(fl, i) else: fl = denoise(fl) if r_bg == True: fl = remove_bg(fl, i, mh, ms, md, lm, b, d) if i % 2: # odd: is photo without the flash fnew.append(fl) else: fnew.append(fl) return fnew, fnew else: return f, f title=""" <div style="text-align: center; max-width: 500px; margin: 0 auto;"> <div style=" display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem; margin-bottom: 10px; " > <h1 style="font-weight: 600; margin-bottom: 7px;"> Video interpolation from images with FILM </h1> </div> <p> This space uses FILM to generate interpolation frames in a set of image files you need to turn into a video for stop motion animation. If .mp4 videos are uploaded instead, selects the sharpest frame of each. Limited to 300 uploaded frames, from the beginning of input.<br /> <a style="display:inline-block" href="https://huggingface.co/spaces/freealise/video_frame_interpolation?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a> </p> </div> """ with gr.Blocks() as demo: with gr.Column(): gr.HTML(title) with gr.Row(): with gr.Column(): with gr.Accordion(label="Upload files here", open=True): files_orig = gr.File(file_count="multiple", file_types=['image', '.mp4']) files_input = gr.File(file_count="multiple", visible=False) gallery_input = gr.Gallery(label="Slideshow", preview=True, columns=8192, interactive=False) with gr.Group(): r_bg = gr.Checkbox(label="Remove background", value=True) with gr.Accordion(label="Max differences for background", open=False): mh = gr.Slider(minimum=0, maximum=180, step=1, value=180, label="Hue") ms = gr.Slider(minimum=0, maximum=255, step=1, value=255, label="Saturation") md = gr.Slider(minimum=0, maximum=255, step=1, value=12, label="Lightness") lm = gr.Radio(label="Use max diffs from", choices=["average", "median", "slider"], value="slider") with gr.Tab("Border"): b_size = gr.Slider(minimum=1, maximum=255, step=2, value=3, label="Inner") d_size = gr.Slider(minimum=1, maximum=255, step=2, value=15, label="Outer") files_orig.upload(fn=loadf, inputs=[files_orig, r_bg, mh, ms, md, lm, b_size, d_size], outputs=[files_input, gallery_input]) with gr.Row(): interpolation_slider = gr.Slider(minimum=1, maximum=5, step=1, value=1, label="Interpolation Steps: ") interpolation = gr.Number(value=1, show_label=False, interactive=False) interpolation_slider.change(fn=logscale, inputs=[interpolation_slider], outputs=[interpolation]) with gr.Row(): fps_output_slider = gr.Slider(minimum=0, maximum=5, step=1, value=0, label="FPS output: ") fps_output = gr.Number(value=1, show_label=False, interactive=False) fps_output_slider.change(fn=logscale, inputs=[fps_output_slider], outputs=[fps_output]) submit_btn = gr.Button("Submit") with gr.Column(): video_output = gr.Video() file_output = gr.File() gr.Examples( examples=[[ ["./examples/0.png", "./examples/1.png", "./examples/2.png", "./examples/3.png", "./examples/4.png"], False, 0, 0, 0, "slider", 1, 1 ], [ ["./examples/0_flash.jpg", "./examples/1_noflash.jpg", "./examples/2_flash.jpg", "./examples/3_noflash.jpg"], True, 180, 255, 12, "slider", 3, 15 ]], fn=loadf, inputs=[files_orig, r_bg, mh, ms, md, lm, b_size, d_size], outputs=[files_input, gallery_input], cache_examples=True ) submit_btn.click(fn=infer, inputs=[files_input, interpolation_slider, fps_output_slider], outputs=[video_output, file_output]) demo.launch()