Spaces:

Epoching
/

3D_Photo_Inpainting

Runtime error

App Files Files Community

3D_Photo_Inpainting / app.py

Epoching

Update app.py

1c06d85 almost 3 years ago

raw

history blame

10.7 kB

	# Repo source: https://github.com/vt-vl-lab/3d-photo-inpainting

	#import os
	#os.environ['QT_DEBUG_PLUGINS'] = '1'

	import subprocess
	#subprocess.run('ldd /home/user/.local/lib/python3.8/site-packages/PyQt5/Qt/plugins/platforms/libqxcb.so', shell=True)
	#subprocess.run('pip list', shell=True)
	subprocess.run('nvidia-smi', shell=True)

	from pyvirtualdisplay import Display
	display = Display(visible=0, size=(1920, 1080)).start()
	#subprocess.run('echo $DISPLAY', shell=True)

	# 3d inpainting imports
	import numpy as np
	import argparse
	import glob
	import os
	from functools import partial
	import vispy
	import scipy.misc as misc
	from tqdm import tqdm
	import yaml
	import time
	import sys
	from mesh import write_ply, read_ply, output_3d_photo
	from utils import get_MiDaS_samples, read_MiDaS_depth
	import torch
	import cv2
	from skimage.transform import resize
	import imageio
	import copy
	from networks import Inpaint_Color_Net, Inpaint_Depth_Net, Inpaint_Edge_Net
	from MiDaS.run import run_depth
	from boostmonodepth_utils import run_boostmonodepth
	from MiDaS.monodepth_net import MonoDepthNet
	import MiDaS.MiDaS_utils as MiDaS_utils
	from bilateral_filtering import sparse_bilateral_filtering

	import torch

	# gradio imports
	import gradio as gr
	import uuid
	from PIL import Image
	from pathlib import Path
	import shutil
	from time import sleep

	def inpaint(img_name, num_frames, fps):

	config = yaml.load(open('argument.yml', 'r'))

	config['num_frames'] = num_frames
	config['fps'] = fps

	if torch.cuda.is_available():
	config['gpu_ids'] = 0

	if config['offscreen_rendering'] is True:
	vispy.use(app='egl')

	os.makedirs(config['mesh_folder'], exist_ok=True)
	os.makedirs(config['video_folder'], exist_ok=True)
	os.makedirs(config['depth_folder'], exist_ok=True)
	sample_list = get_MiDaS_samples(config['src_folder'], config['depth_folder'], config, config['specific'], img_name.stem)
	normal_canvas, all_canvas = None, None

	if isinstance(config["gpu_ids"], int) and (config["gpu_ids"] >= 0):
	device = config["gpu_ids"]
	else:
	device = "cpu"

	print(f"running on device {device}")

	for idx in tqdm(range(len(sample_list))):
	depth = None
	sample = sample_list[idx]
	print("Current Source ==> ", sample['src_pair_name'])
	mesh_fi = os.path.join(config['mesh_folder'], sample['src_pair_name'] +'.ply')
	image = imageio.imread(sample['ref_img_fi'])

	print(f"Running depth extraction at {time.time()}")
	if config['use_boostmonodepth'] is True:
	run_boostmonodepth(sample['ref_img_fi'], config['src_folder'], config['depth_folder'])
	elif config['require_midas'] is True:
	run_depth([sample['ref_img_fi']], config['src_folder'], config['depth_folder'],
	config['MiDaS_model_ckpt'], MonoDepthNet, MiDaS_utils, target_w=640)

	if 'npy' in config['depth_format']:
	config['output_h'], config['output_w'] = np.load(sample['depth_fi']).shape[:2]
	else:
	config['output_h'], config['output_w'] = imageio.imread(sample['depth_fi']).shape[:2]
	frac = config['longer_side_len'] / max(config['output_h'], config['output_w'])
	config['output_h'], config['output_w'] = int(config['output_h'] * frac), int(config['output_w'] * frac)
	config['original_h'], config['original_w'] = config['output_h'], config['output_w']
	if image.ndim == 2:
	image = image[..., None].repeat(3, -1)
	if np.sum(np.abs(image[..., 0] - image[..., 1])) == 0 and np.sum(np.abs(image[..., 1] - image[..., 2])) == 0:
	config['gray_image'] = True
	else:
	config['gray_image'] = False
	image = cv2.resize(image, (config['output_w'], config['output_h']), interpolation=cv2.INTER_AREA)
	depth = read_MiDaS_depth(sample['depth_fi'], 3.0, config['output_h'], config['output_w'])
	mean_loc_depth = depth[depth.shape[0]//2, depth.shape[1]//2]
	if not(config['load_ply'] is True and os.path.exists(mesh_fi)):
	vis_photos, vis_depths = sparse_bilateral_filtering(depth.copy(), image.copy(), config, num_iter=config['sparse_iter'], spdb=False)
	depth = vis_depths[-1]
	model = None
	torch.cuda.empty_cache()
	print("Start Running 3D_Photo ...")
	print(f"Loading edge model at {time.time()}")
	depth_edge_model = Inpaint_Edge_Net(init_weights=True)
	depth_edge_weight = torch.load(config['depth_edge_model_ckpt'],
	map_location=torch.device(device))
	depth_edge_model.load_state_dict(depth_edge_weight)
	depth_edge_model = depth_edge_model.to(device)
	depth_edge_model.eval()

	print(f"Loading depth model at {time.time()}")
	depth_feat_model = Inpaint_Depth_Net()
	depth_feat_weight = torch.load(config['depth_feat_model_ckpt'],
	map_location=torch.device(device))
	depth_feat_model.load_state_dict(depth_feat_weight, strict=True)
	depth_feat_model = depth_feat_model.to(device)
	depth_feat_model.eval()
	depth_feat_model = depth_feat_model.to(device)
	print(f"Loading rgb model at {time.time()}")
	rgb_model = Inpaint_Color_Net()
	rgb_feat_weight = torch.load(config['rgb_feat_model_ckpt'],
	map_location=torch.device(device))
	rgb_model.load_state_dict(rgb_feat_weight)
	rgb_model.eval()
	rgb_model = rgb_model.to(device)
	graph = None


	print(f"Writing depth ply (and basically doing everything) at {time.time()}")
	rt_info = write_ply(image,
	depth,
	sample['int_mtx'],
	mesh_fi,
	config,
	rgb_model,
	depth_edge_model,
	depth_edge_model,
	depth_feat_model)

	if rt_info is False:
	continue
	rgb_model = None
	color_feat_model = None
	depth_edge_model = None
	depth_feat_model = None
	torch.cuda.empty_cache()
	if config['save_ply'] is True or config['load_ply'] is True:
	verts, colors, faces, Height, Width, hFov, vFov = read_ply(mesh_fi)
	else:
	verts, colors, faces, Height, Width, hFov, vFov = rt_info


	print(f"Making video at {time.time()}")
	videos_poses, video_basename = copy.deepcopy(sample['tgts_poses']), sample['tgt_name']
	top = (config.get('original_h') // 2 - sample['int_mtx'][1, 2] * config['output_h'])
	left = (config.get('original_w') // 2 - sample['int_mtx'][0, 2] * config['output_w'])
	down, right = top + config['output_h'], left + config['output_w']
	border = [int(xx) for xx in [top, down, left, right]]
	normal_canvas, all_canvas = output_3d_photo(verts.copy(), colors.copy(), faces.copy(), copy.deepcopy(Height), copy.deepcopy(Width), copy.deepcopy(hFov), copy.deepcopy(vFov),
	copy.deepcopy(sample['tgt_pose']), sample['video_postfix'], copy.deepcopy(sample['ref_pose']), copy.deepcopy(config['video_folder']),
	image.copy(), copy.deepcopy(sample['int_mtx']), config, image,
	videos_poses, video_basename, config.get('original_h'), config.get('original_w'), border=border, depth=depth, normal_canvas=normal_canvas, all_canvas=all_canvas,
	mean_loc_depth=mean_loc_depth)

	def resizer(input_img, max_img_size=512):
	width, height = input_img.size
	long_edge = height if height >= width else width
	if long_edge > max_img_size:
	ratio = max_img_size / long_edge
	resized_width = int(ratio * width)
	resized_height = int(ratio * height)
	resized_input_img = input_img.resize((resized_width, resized_height), resample=2)
	return resized_input_img

	else:
	return input_img

	def main_app(input_img, num_frames, fps):

	# resize down
	input_img = resizer(input_img)

	# Save image in necessary folder for inpainting
	#img_name = Path(str(uuid.uuid4()) + '.jpg')
	img_name = Path('sample.jpg')
	save_folder = Path('image')
	input_img.save(save_folder/img_name)

	inpaint(img_name, num_frames, fps)

	#subprocess.run('ls -l', shell=True)
	#subprocess.run('ls image -l', shell=True)
	#subprocess.run('ls video/ -l', shell=True)

	# Get output video path & return
	input_img_path = str(save_folder/img_name)
	out_vid_path = 'video/{0}_circle.mp4'.format(img_name.stem)

	return out_vid_path

	video_choices = ['dolly-zoom-in', 'zoom-in', 'circle', 'swing']
	gradio_inputs = [gr.inputs.Image(type='pil', label='Input Image'),
	gr.inputs.Slider(minimum=60, maximum=240, step=1, default=120, label="Number of Frames"),
	gr.inputs.Slider(minimum=10, maximum=40, step=1, default=20, label="Frames per Second (FPS)")]

	gradio_outputs = [gr.outputs.Video(label='Output Video')]
	examples = [ ['moon.jpg'], ['dog.jpg'] ]

	description="Convert an image into a trajectory-following video. Images are automatically resized down to a max edge of 512. \| NOTE: The current runtime for a sample is around 400-700 seconds. Running on a lower number of frames could help! Do be patient as this is on CPU-only, BUT if this space maybe gets a GPU one day, it's already configured to run with GPU-support :) If you have a GPU, feel free to use the author's original repo (linked at the bottom of this path, they have a collab notebook!) You can also run this space/gradio app locally!"

	article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2004.04727' target='_blank'>3D Photography using Context-aware Layered Depth Inpainting</a> \| <a href='https://shihmengli.github.io/3D-Photo-Inpainting/' target='_blank'>Github Project Page</a> \| <a href='https://github.com/vt-vl-lab/3d-photo-inpainting' target='_blank'>Github Repo</a></p>"

	iface = gr.Interface(fn=main_app, inputs=gradio_inputs , outputs=gradio_outputs, examples=examples,
	title='3D Image Inpainting',
	description=description,
	article=article,
	enable_queue=True)

	iface.launch(enable_queue=True)