Spaces:

Epoching
/

3D_Photo_Inpainting

Runtime error

File size: 10,668 Bytes

# Repo source: https://github.com/vt-vl-lab/3d-photo-inpainting

#import os
#os.environ['QT_DEBUG_PLUGINS'] = '1'

import subprocess
#subprocess.run('ldd /home/user/.local/lib/python3.8/site-packages/PyQt5/Qt/plugins/platforms/libqxcb.so', shell=True)
#subprocess.run('pip list', shell=True)
subprocess.run('nvidia-smi', shell=True)

from pyvirtualdisplay import Display
display = Display(visible=0, size=(1920, 1080)).start()
#subprocess.run('echo $DISPLAY', shell=True)

# 3d inpainting imports
import numpy as np
import argparse
import glob
import os
from functools import partial
import vispy
import scipy.misc as misc
from tqdm import tqdm
import yaml
import time
import sys
from mesh import write_ply, read_ply, output_3d_photo
from utils import get_MiDaS_samples, read_MiDaS_depth
import torch
import cv2
from skimage.transform import resize
import imageio
import copy
from networks import Inpaint_Color_Net, Inpaint_Depth_Net, Inpaint_Edge_Net
from MiDaS.run import run_depth
from boostmonodepth_utils import run_boostmonodepth
from MiDaS.monodepth_net import MonoDepthNet
import MiDaS.MiDaS_utils as MiDaS_utils
from bilateral_filtering import sparse_bilateral_filtering

import torch

# gradio imports
import gradio as gr
import uuid
from PIL import Image
from pathlib import Path
import shutil
from time import sleep

def inpaint(img_name, num_frames, fps):
    
    config = yaml.load(open('argument.yml', 'r'))
    
    config['num_frames'] = num_frames
    config['fps'] = fps
    
    if torch.cuda.is_available():
        config['gpu_ids'] = 0
    
    if config['offscreen_rendering'] is True:
        vispy.use(app='egl')
    
    os.makedirs(config['mesh_folder'], exist_ok=True)
    os.makedirs(config['video_folder'], exist_ok=True)
    os.makedirs(config['depth_folder'], exist_ok=True)
    sample_list = get_MiDaS_samples(config['src_folder'], config['depth_folder'], config, config['specific'], img_name.stem)
    normal_canvas, all_canvas = None, None

    if isinstance(config["gpu_ids"], int) and (config["gpu_ids"] >= 0):
        device = config["gpu_ids"]
    else:
        device = "cpu"

    print(f"running on device {device}")

    for idx in tqdm(range(len(sample_list))):
        depth = None
        sample = sample_list[idx]
        print("Current Source ==> ", sample['src_pair_name'])
        mesh_fi = os.path.join(config['mesh_folder'], sample['src_pair_name'] +'.ply')
        image = imageio.imread(sample['ref_img_fi'])

        print(f"Running depth extraction at {time.time()}")
        if config['use_boostmonodepth'] is True:
            run_boostmonodepth(sample['ref_img_fi'], config['src_folder'], config['depth_folder'])
        elif config['require_midas'] is True:
            run_depth([sample['ref_img_fi']], config['src_folder'], config['depth_folder'],
                      config['MiDaS_model_ckpt'], MonoDepthNet, MiDaS_utils, target_w=640)

        if 'npy' in config['depth_format']:
            config['output_h'], config['output_w'] = np.load(sample['depth_fi']).shape[:2]
        else:
            config['output_h'], config['output_w'] = imageio.imread(sample['depth_fi']).shape[:2]
        frac = config['longer_side_len'] / max(config['output_h'], config['output_w'])
        config['output_h'], config['output_w'] = int(config['output_h'] * frac), int(config['output_w'] * frac)
        config['original_h'], config['original_w'] = config['output_h'], config['output_w']
        if image.ndim == 2:
            image = image[..., None].repeat(3, -1)
        if np.sum(np.abs(image[..., 0] - image[..., 1])) == 0 and np.sum(np.abs(image[..., 1] - image[..., 2])) == 0:
            config['gray_image'] = True
        else:
            config['gray_image'] = False
        image = cv2.resize(image, (config['output_w'], config['output_h']), interpolation=cv2.INTER_AREA)
        depth = read_MiDaS_depth(sample['depth_fi'], 3.0, config['output_h'], config['output_w'])
        mean_loc_depth = depth[depth.shape[0]//2, depth.shape[1]//2]
        if not(config['load_ply'] is True and os.path.exists(mesh_fi)):
            vis_photos, vis_depths = sparse_bilateral_filtering(depth.copy(), image.copy(), config, num_iter=config['sparse_iter'], spdb=False)
            depth = vis_depths[-1]
            model = None
            torch.cuda.empty_cache()
            print("Start Running 3D_Photo ...")
            print(f"Loading edge model at {time.time()}")
            depth_edge_model = Inpaint_Edge_Net(init_weights=True)
            depth_edge_weight = torch.load(config['depth_edge_model_ckpt'],
                                           map_location=torch.device(device))
            depth_edge_model.load_state_dict(depth_edge_weight)
            depth_edge_model = depth_edge_model.to(device)
            depth_edge_model.eval()

            print(f"Loading depth model at {time.time()}")
            depth_feat_model = Inpaint_Depth_Net()
            depth_feat_weight = torch.load(config['depth_feat_model_ckpt'],
                                           map_location=torch.device(device))
            depth_feat_model.load_state_dict(depth_feat_weight, strict=True)
            depth_feat_model = depth_feat_model.to(device)
            depth_feat_model.eval()
            depth_feat_model = depth_feat_model.to(device)
            print(f"Loading rgb model at {time.time()}")
            rgb_model = Inpaint_Color_Net()
            rgb_feat_weight = torch.load(config['rgb_feat_model_ckpt'],
                                         map_location=torch.device(device))
            rgb_model.load_state_dict(rgb_feat_weight)
            rgb_model.eval()
            rgb_model = rgb_model.to(device)
            graph = None


            print(f"Writing depth ply (and basically doing everything) at {time.time()}")
            rt_info = write_ply(image,
                                  depth,
                                  sample['int_mtx'],
                                  mesh_fi,
                                  config,
                                  rgb_model,
                                  depth_edge_model,
                                  depth_edge_model,
                                  depth_feat_model)

            if rt_info is False:
                continue
            rgb_model = None
            color_feat_model = None
            depth_edge_model = None
            depth_feat_model = None
            torch.cuda.empty_cache()
        if config['save_ply'] is True or config['load_ply'] is True:
            verts, colors, faces, Height, Width, hFov, vFov = read_ply(mesh_fi)
        else:
            verts, colors, faces, Height, Width, hFov, vFov = rt_info


        print(f"Making video at {time.time()}")
        videos_poses, video_basename = copy.deepcopy(sample['tgts_poses']), sample['tgt_name']
        top = (config.get('original_h') // 2 - sample['int_mtx'][1, 2] * config['output_h'])
        left = (config.get('original_w') // 2 - sample['int_mtx'][0, 2] * config['output_w'])
        down, right = top + config['output_h'], left + config['output_w']
        border = [int(xx) for xx in [top, down, left, right]]
        normal_canvas, all_canvas = output_3d_photo(verts.copy(), colors.copy(), faces.copy(), copy.deepcopy(Height), copy.deepcopy(Width), copy.deepcopy(hFov), copy.deepcopy(vFov),
                            copy.deepcopy(sample['tgt_pose']), sample['video_postfix'], copy.deepcopy(sample['ref_pose']), copy.deepcopy(config['video_folder']),
                            image.copy(), copy.deepcopy(sample['int_mtx']), config, image,
                            videos_poses, video_basename, config.get('original_h'), config.get('original_w'), border=border, depth=depth, normal_canvas=normal_canvas, all_canvas=all_canvas,
                            mean_loc_depth=mean_loc_depth)

def resizer(input_img, max_img_size=512):
    width, height = input_img.size
    long_edge = height if height >= width else width
    if long_edge > max_img_size:
        ratio = max_img_size / long_edge
        resized_width = int(ratio * width)
        resized_height = int(ratio * height)
        resized_input_img = input_img.resize((resized_width, resized_height), resample=2)
        return resized_input_img 
        
    else:
        return input_img

def main_app(input_img, num_frames, fps):
    
    # resize down
    input_img = resizer(input_img)
    
    # Save image in necessary folder for inpainting
    #img_name = Path(str(uuid.uuid4()) + '.jpg')
    img_name = Path('sample.jpg')
    save_folder = Path('image')
    input_img.save(save_folder/img_name)
    
    inpaint(img_name, num_frames, fps)
    
    #subprocess.run('ls -l', shell=True)
    #subprocess.run('ls image -l', shell=True)
    #subprocess.run('ls video/ -l', shell=True)
    
    # Get output video path & return
    input_img_path = str(save_folder/img_name)
    out_vid_path = 'video/{0}_circle.mp4'.format(img_name.stem)
    
    return out_vid_path

video_choices = ['dolly-zoom-in', 'zoom-in', 'circle', 'swing']
gradio_inputs = [gr.inputs.Image(type='pil', label='Input Image'),
                 gr.inputs.Slider(minimum=60, maximum=240, step=1, default=120, label="Number of Frames"),
                 gr.inputs.Slider(minimum=10, maximum=40, step=1, default=20, label="Frames per Second (FPS)")]
                 
gradio_outputs = [gr.outputs.Video(label='Output Video')]
examples = [ ['moon.jpg'], ['dog.jpg'] ]

description="Convert an image into a trajectory-following video. Images are automatically resized down to a max edge of 512. | NOTE: The current runtime for a sample is around 400-700 seconds. Running on a lower number of frames could help! Do be patient as this is on CPU-only, BUT if this space maybe gets a GPU one day, it's already configured to run with GPU-support :) If you have a GPU, feel free to use the author's original repo (linked at the bottom of this path, they have a collab notebook!) You can also run this space/gradio app locally!"

article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2004.04727' target='_blank'>3D Photography using Context-aware Layered Depth Inpainting</a> | <a href='https://shihmengli.github.io/3D-Photo-Inpainting/' target='_blank'>Github Project Page</a> | <a href='https://github.com/vt-vl-lab/3d-photo-inpainting' target='_blank'>Github Repo</a></p>"

iface = gr.Interface(fn=main_app, inputs=gradio_inputs , outputs=gradio_outputs, examples=examples,
                     title='3D Image Inpainting',
                     description=description,
                     article=article,
                     enable_queue=True)

iface.launch(enable_queue=True)