|
import os
|
|
|
|
import numpy as np
|
|
import torch
|
|
import torchvision.transforms.functional as torchvision_F
|
|
from PIL import Image
|
|
from transparent_background import Remover
|
|
|
|
import spar3d.models.utils as spar3d_utils
|
|
|
|
|
|
def get_device():
|
|
if os.environ.get("SPAR3D_USE_CPU", "0") == "1":
|
|
return "cpu"
|
|
|
|
device = "cpu"
|
|
if torch.cuda.is_available():
|
|
device = "cuda"
|
|
elif torch.backends.mps.is_available():
|
|
device = "mps"
|
|
return device
|
|
|
|
|
|
def create_intrinsic_from_fov_rad(fov_rad: float, cond_height: int, cond_width: int):
|
|
intrinsic = spar3d_utils.get_intrinsic_from_fov(
|
|
fov_rad,
|
|
H=cond_height,
|
|
W=cond_width,
|
|
)
|
|
intrinsic_normed_cond = intrinsic.clone()
|
|
intrinsic_normed_cond[..., 0, 2] /= cond_width
|
|
intrinsic_normed_cond[..., 1, 2] /= cond_height
|
|
intrinsic_normed_cond[..., 0, 0] /= cond_width
|
|
intrinsic_normed_cond[..., 1, 1] /= cond_height
|
|
|
|
return intrinsic, intrinsic_normed_cond
|
|
|
|
|
|
def create_intrinsic_from_fov_deg(fov_deg: float, cond_height: int, cond_width: int):
|
|
return create_intrinsic_from_fov_rad(np.deg2rad(fov_deg), cond_height, cond_width)
|
|
|
|
|
|
def default_cond_c2w(distance: float):
|
|
c2w_cond = torch.as_tensor(
|
|
[
|
|
[0, 0, 1, distance],
|
|
[1, 0, 0, 0],
|
|
[0, 1, 0, 0],
|
|
[0, 0, 0, 1],
|
|
]
|
|
).float()
|
|
return c2w_cond
|
|
|
|
|
|
def normalize_pc_bbox(pc, scale=1.0):
|
|
|
|
assert len(pc.shape) in [2, 3] and pc.shape[-1] in [3, 6, 9]
|
|
n_dim = len(pc.shape)
|
|
device = pc.device
|
|
pc = pc.cpu()
|
|
if n_dim == 2:
|
|
pc = pc.unsqueeze(0)
|
|
normalize_pc = []
|
|
for b in range(pc.shape[0]):
|
|
xyz = pc[b, :, :3]
|
|
bound_x = (xyz[:, 0].max(), xyz[:, 0].min())
|
|
bound_y = (xyz[:, 1].max(), xyz[:, 1].min())
|
|
bound_z = (xyz[:, 2].max(), xyz[:, 2].min())
|
|
|
|
center = np.array(
|
|
[
|
|
(bound_x[0] + bound_x[1]) / 2,
|
|
(bound_y[0] + bound_y[1]) / 2,
|
|
(bound_z[0] + bound_z[1]) / 2,
|
|
]
|
|
)
|
|
|
|
scale = max(
|
|
bound_x[0] - bound_x[1], bound_y[0] - bound_y[1], bound_z[0] - bound_z[1]
|
|
)
|
|
xyz = (xyz - center) / scale
|
|
extra = pc[b, :, 3:]
|
|
normalize_pc.append(torch.cat([xyz, extra], dim=-1))
|
|
return (
|
|
torch.stack(normalize_pc, dim=0).to(device)
|
|
if n_dim == 3
|
|
else normalize_pc[0].to(device)
|
|
)
|
|
|
|
|
|
def remove_background(
|
|
image: Image,
|
|
bg_remover: Remover = None,
|
|
force: bool = False,
|
|
**transparent_background_kwargs,
|
|
) -> Image:
|
|
do_remove = True
|
|
if image.mode == "RGBA" and image.getextrema()[3][0] < 255:
|
|
do_remove = False
|
|
do_remove = do_remove or force
|
|
if do_remove:
|
|
image = bg_remover.process(
|
|
image.convert("RGB"), **transparent_background_kwargs
|
|
)
|
|
return image
|
|
|
|
|
|
def get_1d_bounds(arr):
|
|
nz = np.flatnonzero(arr)
|
|
return nz[0], nz[-1]
|
|
|
|
|
|
def get_bbox_from_mask(mask, thr=0.5):
|
|
masks_for_box = (mask > thr).astype(np.float32)
|
|
assert masks_for_box.sum() > 0, "Empty mask!"
|
|
x0, x1 = get_1d_bounds(masks_for_box.sum(axis=-2))
|
|
y0, y1 = get_1d_bounds(masks_for_box.sum(axis=-1))
|
|
return x0, y0, x1, y1
|
|
|
|
|
|
def foreground_crop(image_rgba, crop_ratio=1.3, newsize=None, no_crop=False):
|
|
|
|
assert image_rgba.mode == "RGBA", "Image must be in RGBA mode!"
|
|
if not no_crop:
|
|
mask_np = np.array(image_rgba)[:, :, -1]
|
|
mask_np = (mask_np >= 1).astype(np.float32)
|
|
x1, y1, x2, y2 = get_bbox_from_mask(mask_np, thr=0.5)
|
|
h, w = y2 - y1, x2 - x1
|
|
yc, xc = (y1 + y2) / 2, (x1 + x2) / 2
|
|
scale = max(h, w) * crop_ratio
|
|
image = torchvision_F.crop(
|
|
image_rgba,
|
|
top=int(yc - scale / 2),
|
|
left=int(xc - scale / 2),
|
|
height=int(scale),
|
|
width=int(scale),
|
|
)
|
|
else:
|
|
image = image_rgba
|
|
|
|
if newsize is not None:
|
|
image = image.resize(newsize)
|
|
return image
|
|
|