# Repo source: https://github.com/vt-vl-lab/3d-photo-inpainting #import os #os.environ['QT_DEBUG_PLUGINS'] = '1' import subprocess #subprocess.run('ldd /home/user/.local/lib/python3.8/site-packages/PyQt5/Qt/plugins/platforms/libqxcb.so', shell=True) #subprocess.run('pip list', shell=True) subprocess.run('nvidia-smi', shell=True) from pyvirtualdisplay import Display display = Display(visible=0, size=(1920, 1080)).start() #subprocess.run('echo $DISPLAY', shell=True) # 3d inpainting imports import numpy as np import argparse import glob import os from functools import partial import vispy import scipy.misc as misc from tqdm import tqdm import yaml import time import sys from mesh import write_ply, read_ply, output_3d_photo from utils import get_MiDaS_samples, read_MiDaS_depth import torch import cv2 from skimage.transform import resize import imageio import copy from networks import Inpaint_Color_Net, Inpaint_Depth_Net, Inpaint_Edge_Net from MiDaS.run import run_depth from boostmonodepth_utils import run_boostmonodepth from MiDaS.monodepth_net import MonoDepthNet import MiDaS.MiDaS_utils as MiDaS_utils from bilateral_filtering import sparse_bilateral_filtering import torch # gradio imports import gradio as gr import uuid from PIL import Image from pathlib import Path import shutil from time import sleep def inpaint(img_name, num_frames, fps): config = yaml.load(open('argument.yml', 'r')) config['num_frames'] = num_frames config['fps'] = fps if torch.cuda.is_available(): config['gpu_ids'] = 0 if config['offscreen_rendering'] is True: vispy.use(app='egl') os.makedirs(config['mesh_folder'], exist_ok=True) os.makedirs(config['video_folder'], exist_ok=True) os.makedirs(config['depth_folder'], exist_ok=True) sample_list = get_MiDaS_samples(config['src_folder'], config['depth_folder'], config, config['specific'], img_name.stem) normal_canvas, all_canvas = None, None if isinstance(config["gpu_ids"], int) and (config["gpu_ids"] >= 0): device = config["gpu_ids"] else: device = "cpu" print(f"running on device {device}") for idx in tqdm(range(len(sample_list))): depth = None sample = sample_list[idx] print("Current Source ==> ", sample['src_pair_name']) mesh_fi = os.path.join(config['mesh_folder'], sample['src_pair_name'] +'.ply') image = imageio.imread(sample['ref_img_fi']) print(f"Running depth extraction at {time.time()}") if config['use_boostmonodepth'] is True: run_boostmonodepth(sample['ref_img_fi'], config['src_folder'], config['depth_folder']) elif config['require_midas'] is True: run_depth([sample['ref_img_fi']], config['src_folder'], config['depth_folder'], config['MiDaS_model_ckpt'], MonoDepthNet, MiDaS_utils, target_w=640) if 'npy' in config['depth_format']: config['output_h'], config['output_w'] = np.load(sample['depth_fi']).shape[:2] else: config['output_h'], config['output_w'] = imageio.imread(sample['depth_fi']).shape[:2] frac = config['longer_side_len'] / max(config['output_h'], config['output_w']) config['output_h'], config['output_w'] = int(config['output_h'] * frac), int(config['output_w'] * frac) config['original_h'], config['original_w'] = config['output_h'], config['output_w'] if image.ndim == 2: image = image[..., None].repeat(3, -1) if np.sum(np.abs(image[..., 0] - image[..., 1])) == 0 and np.sum(np.abs(image[..., 1] - image[..., 2])) == 0: config['gray_image'] = True else: config['gray_image'] = False image = cv2.resize(image, (config['output_w'], config['output_h']), interpolation=cv2.INTER_AREA) depth = read_MiDaS_depth(sample['depth_fi'], 3.0, config['output_h'], config['output_w']) mean_loc_depth = depth[depth.shape[0]//2, depth.shape[1]//2] if not(config['load_ply'] is True and os.path.exists(mesh_fi)): vis_photos, vis_depths = sparse_bilateral_filtering(depth.copy(), image.copy(), config, num_iter=config['sparse_iter'], spdb=False) depth = vis_depths[-1] model = None torch.cuda.empty_cache() print("Start Running 3D_Photo ...") print(f"Loading edge model at {time.time()}") depth_edge_model = Inpaint_Edge_Net(init_weights=True) depth_edge_weight = torch.load(config['depth_edge_model_ckpt'], map_location=torch.device(device)) depth_edge_model.load_state_dict(depth_edge_weight) depth_edge_model = depth_edge_model.to(device) depth_edge_model.eval() print(f"Loading depth model at {time.time()}") depth_feat_model = Inpaint_Depth_Net() depth_feat_weight = torch.load(config['depth_feat_model_ckpt'], map_location=torch.device(device)) depth_feat_model.load_state_dict(depth_feat_weight, strict=True) depth_feat_model = depth_feat_model.to(device) depth_feat_model.eval() depth_feat_model = depth_feat_model.to(device) print(f"Loading rgb model at {time.time()}") rgb_model = Inpaint_Color_Net() rgb_feat_weight = torch.load(config['rgb_feat_model_ckpt'], map_location=torch.device(device)) rgb_model.load_state_dict(rgb_feat_weight) rgb_model.eval() rgb_model = rgb_model.to(device) graph = None print(f"Writing depth ply (and basically doing everything) at {time.time()}") rt_info = write_ply(image, depth, sample['int_mtx'], mesh_fi, config, rgb_model, depth_edge_model, depth_edge_model, depth_feat_model) if rt_info is False: continue rgb_model = None color_feat_model = None depth_edge_model = None depth_feat_model = None torch.cuda.empty_cache() if config['save_ply'] is True or config['load_ply'] is True: verts, colors, faces, Height, Width, hFov, vFov = read_ply(mesh_fi) else: verts, colors, faces, Height, Width, hFov, vFov = rt_info print(f"Making video at {time.time()}") videos_poses, video_basename = copy.deepcopy(sample['tgts_poses']), sample['tgt_name'] top = (config.get('original_h') // 2 - sample['int_mtx'][1, 2] * config['output_h']) left = (config.get('original_w') // 2 - sample['int_mtx'][0, 2] * config['output_w']) down, right = top + config['output_h'], left + config['output_w'] border = [int(xx) for xx in [top, down, left, right]] normal_canvas, all_canvas = output_3d_photo(verts.copy(), colors.copy(), faces.copy(), copy.deepcopy(Height), copy.deepcopy(Width), copy.deepcopy(hFov), copy.deepcopy(vFov), copy.deepcopy(sample['tgt_pose']), sample['video_postfix'], copy.deepcopy(sample['ref_pose']), copy.deepcopy(config['video_folder']), image.copy(), copy.deepcopy(sample['int_mtx']), config, image, videos_poses, video_basename, config.get('original_h'), config.get('original_w'), border=border, depth=depth, normal_canvas=normal_canvas, all_canvas=all_canvas, mean_loc_depth=mean_loc_depth) def resizer(input_img, max_img_size=512): width, height = input_img.size long_edge = height if height >= width else width if long_edge > max_img_size: ratio = max_img_size / long_edge resized_width = int(ratio * width) resized_height = int(ratio * height) resized_input_img = input_img.resize((resized_width, resized_height), resample=2) return resized_input_img else: return input_img def main_app(input_img, num_frames, fps): # resize down input_img = resizer(input_img) # Save image in necessary folder for inpainting #img_name = Path(str(uuid.uuid4()) + '.jpg') img_name = Path('sample.jpg') save_folder = Path('image') input_img.save(save_folder/img_name) inpaint(img_name, num_frames, fps) #subprocess.run('ls -l', shell=True) #subprocess.run('ls image -l', shell=True) #subprocess.run('ls video/ -l', shell=True) # Get output video path & return input_img_path = str(save_folder/img_name) out_vid_path = 'video/{0}_circle.mp4'.format(img_name.stem) return out_vid_path video_choices = ['dolly-zoom-in', 'zoom-in', 'circle', 'swing'] gradio_inputs = [gr.inputs.Image(type='pil', label='Input Image'), gr.inputs.Slider(minimum=60, maximum=240, step=1, default=120, label="Number of Frames"), gr.inputs.Slider(minimum=10, maximum=40, step=1, default=20, label="Frames per Second (FPS)")] gradio_outputs = [gr.outputs.Video(label='Output Video')] examples = [ ['moon.jpg'], ['dog.jpg'] ] description="Convert an image into a trajectory-following video. Images are automatically resized down to a max edge of 512. | NOTE: The current runtime for a sample is around 400-700 seconds. Running on a lower number of frames could help! Do be patient as this is on CPU-only, BUT if this space maybe gets a GPU one day, it's already configured to run with GPU-support :) If you have a GPU, feel free to use the author's original repo (linked at the bottom of this path, they have a collab notebook!) You can also run this space/gradio app locally!" article = "
3D Photography using Context-aware Layered Depth Inpainting | Github Project Page | Github Repo
" iface = gr.Interface(fn=main_app, inputs=gradio_inputs , outputs=gradio_outputs, examples=examples, title='3D Image Inpainting', description=description, article=article, enable_queue=True) iface.launch(enable_queue=True)