Spaces:
Running
Running
ThunderVVV
commited on
Commit
·
981eadf
1
Parent(s):
193a8b0
update
Browse files- app.py +50 -20
- lib/vis/renderer_world.py +357 -0
- pre-requirements.txt +2 -1
- scripts/scripts_test_video/hawor_video.py +6 -0
app.py
CHANGED
@@ -49,13 +49,17 @@ os.environ["FORCE_CUDA"] = "1"
|
|
49 |
os.system('pip install git+https://github.com/facebookresearch/pytorch3d.git@stable')
|
50 |
|
51 |
import numpy as np
|
|
|
|
|
|
|
52 |
from easydict import EasyDict
|
53 |
from scripts.scripts_test_video.detect_track_video import detect_track_video
|
54 |
from scripts.scripts_test_video.hawor_video import hawor_motion_estimation, hawor_infiller
|
55 |
from scripts.scripts_test_video.hawor_slam import hawor_slam
|
56 |
from hawor.utils.process import get_mano_faces, run_mano, run_mano_left
|
57 |
from lib.eval_utils.custom_utils import load_slam_cam
|
58 |
-
from lib.vis.run_vis2 import run_vis2_on_video, run_vis2_on_video_cam
|
|
|
59 |
|
60 |
@spaces.GPU(duration=300)
|
61 |
def render_reconstruction(input_video, img_focal):
|
@@ -71,8 +75,9 @@ def render_reconstruction(input_video, img_focal):
|
|
71 |
|
72 |
frame_chunks_all, img_focal = hawor_motion_estimation(args, start_idx, end_idx, seq_folder)
|
73 |
|
74 |
-
hawor_slam(args, start_idx, end_idx)
|
75 |
slam_path = os.path.join(seq_folder, f"SLAM/hawor_slam_w_scale_{start_idx}_{end_idx}.npz")
|
|
|
|
|
76 |
R_w2c_sla_all, t_w2c_sla_all, R_c2w_sla_all, t_c2w_sla_all = load_slam_cam(slam_path)
|
77 |
|
78 |
pred_trans, pred_rot, pred_hand_pose, pred_betas, pred_valid = hawor_infiller(args, start_idx, end_idx, frame_chunks_all)
|
@@ -134,24 +139,49 @@ def render_reconstruction(input_video, img_focal):
|
|
134 |
left_dict['vertices'] = torch.einsum('ij,btnj->btni', R_x, left_dict['vertices'].cpu())
|
135 |
right_dict['vertices'] = torch.einsum('ij,btnj->btni', R_x, right_dict['vertices'].cpu())
|
136 |
|
137 |
-
#
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
|
157 |
|
|
|
49 |
os.system('pip install git+https://github.com/facebookresearch/pytorch3d.git@stable')
|
50 |
|
51 |
import numpy as np
|
52 |
+
import joblib
|
53 |
+
import cv2
|
54 |
+
import imageio
|
55 |
from easydict import EasyDict
|
56 |
from scripts.scripts_test_video.detect_track_video import detect_track_video
|
57 |
from scripts.scripts_test_video.hawor_video import hawor_motion_estimation, hawor_infiller
|
58 |
from scripts.scripts_test_video.hawor_slam import hawor_slam
|
59 |
from hawor.utils.process import get_mano_faces, run_mano, run_mano_left
|
60 |
from lib.eval_utils.custom_utils import load_slam_cam
|
61 |
+
from lib.vis.run_vis2 import lookat_matrix, run_vis2_on_video, run_vis2_on_video_cam
|
62 |
+
from lib.vis.renderer_world import Renderer
|
63 |
|
64 |
@spaces.GPU(duration=300)
|
65 |
def render_reconstruction(input_video, img_focal):
|
|
|
75 |
|
76 |
frame_chunks_all, img_focal = hawor_motion_estimation(args, start_idx, end_idx, seq_folder)
|
77 |
|
|
|
78 |
slam_path = os.path.join(seq_folder, f"SLAM/hawor_slam_w_scale_{start_idx}_{end_idx}.npz")
|
79 |
+
if not os.path.exists(slam_path):
|
80 |
+
hawor_slam(args, start_idx, end_idx)
|
81 |
R_w2c_sla_all, t_w2c_sla_all, R_c2w_sla_all, t_c2w_sla_all = load_slam_cam(slam_path)
|
82 |
|
83 |
pred_trans, pred_rot, pred_hand_pose, pred_betas, pred_valid = hawor_infiller(args, start_idx, end_idx, frame_chunks_all)
|
|
|
139 |
left_dict['vertices'] = torch.einsum('ij,btnj->btni', R_x, left_dict['vertices'].cpu())
|
140 |
right_dict['vertices'] = torch.einsum('ij,btnj->btni', R_x, right_dict['vertices'].cpu())
|
141 |
|
142 |
+
# simple visualization
|
143 |
+
bin_size = 128
|
144 |
+
max_faces_per_bin = 20000
|
145 |
+
img = cv2.imread(imgfiles[0])
|
146 |
+
renderer = Renderer(img.shape[1], img.shape[0], 1800, 'cuda',
|
147 |
+
bin_size=bin_size, max_faces_per_bin=max_faces_per_bin)
|
148 |
+
|
149 |
+
output_pth = os.path.join(seq_folder, f"vis_{vis_start}_{vis_end}")
|
150 |
+
if not os.path.exists(output_pth):
|
151 |
+
os.makedirs(output_pth)
|
152 |
+
image_names = imgfiles[vis_start:vis_end]
|
153 |
+
print(f"vis {vis_start} to {vis_end}")
|
154 |
+
# vis_video_path = run_vis2_on_video(left_dict, right_dict, output_pth, img_focal, image_names, R_c2w=R_c2w_sla_all[vis_start:vis_end], t_c2w=t_c2w_sla_all[vis_start:vis_end], interactive=False)
|
155 |
+
faces_left = torch.from_numpy(faces_left).cuda()
|
156 |
+
faces_right = torch.from_numpy(faces_right).cuda()
|
157 |
+
faces_all = torch.stack((faces_left, faces_right))
|
158 |
+
|
159 |
+
side_source = torch.tensor([0.463, -0.478, 2.456])
|
160 |
+
side_target = torch.tensor([0.026, -0.481, -3.184])
|
161 |
+
up = torch.tensor([1.0, 0.0, 0.0])
|
162 |
+
view_camera = lookat_matrix(side_source, side_target, up)
|
163 |
+
cam_R = view_camera[:3, :3].unsqueeze(0).cuda()
|
164 |
+
cam_T = view_camera[:3, 3].unsqueeze(0).cuda()
|
165 |
+
vis_video_imgs = []
|
166 |
+
writer = imageio.get_writer(f'{seq_folder}/vis_output.mp4', fps=30, mode='I',
|
167 |
+
format='FFMPEG', macro_block_size=1)
|
168 |
+
renderer.set_ground(100, 0, 0)
|
169 |
+
for img_i, _ in enumerate(image_names):
|
170 |
+
|
171 |
+
vertices_left = left_dict['vertices'][:, img_i]
|
172 |
+
vertices_right = right_dict['vertices'][:, img_i]
|
173 |
+
|
174 |
+
cameras, lights = renderer.create_camera_from_cv(cam_R, cam_T)
|
175 |
+
verts_color = torch.tensor([0.207, 0.596, 0.792, 1.0]).unsqueeze(0).repeat(2, 1)
|
176 |
+
vertices_i = torch.stack((vertices_left, vertices_right))
|
177 |
+
rend, _ = renderer.render_multiple(vertices_i.cuda(), faces_all.cuda(), verts_color.cuda(), cameras, lights)
|
178 |
+
|
179 |
+
writer.append_data(rend)
|
180 |
+
|
181 |
+
writer.close()
|
182 |
+
print("finish")
|
183 |
+
|
184 |
+
return f'{seq_folder}/vis_output.mp4'
|
185 |
|
186 |
|
187 |
|
lib/vis/renderer_world.py
ADDED
@@ -0,0 +1,357 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Useful rendering functions from WHAM (some modification)
|
2 |
+
|
3 |
+
import cv2
|
4 |
+
import torch
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
from pytorch3d.renderer import (
|
8 |
+
PerspectiveCameras,
|
9 |
+
TexturesVertex,
|
10 |
+
PointLights,
|
11 |
+
Materials,
|
12 |
+
RasterizationSettings,
|
13 |
+
MeshRenderer,
|
14 |
+
MeshRasterizer,
|
15 |
+
SoftPhongShader,
|
16 |
+
)
|
17 |
+
from pytorch3d.structures import Meshes
|
18 |
+
from pytorch3d.structures.meshes import join_meshes_as_scene
|
19 |
+
from pytorch3d.renderer.cameras import look_at_rotation
|
20 |
+
from pytorch3d.renderer.camera_conversions import _cameras_from_opencv_projection
|
21 |
+
|
22 |
+
from lib.vis.wham_tools.tools import checkerboard_geometry
|
23 |
+
|
24 |
+
def overlay_image_onto_background(image, mask, bbox, background):
|
25 |
+
if isinstance(image, torch.Tensor):
|
26 |
+
image = image.detach().cpu().numpy()
|
27 |
+
if isinstance(mask, torch.Tensor):
|
28 |
+
mask = mask.detach().cpu().numpy()
|
29 |
+
|
30 |
+
out_image = background.copy()
|
31 |
+
bbox = bbox[0].int().cpu().numpy().copy()
|
32 |
+
roi_image = out_image[bbox[1]:bbox[3], bbox[0]:bbox[2]]
|
33 |
+
|
34 |
+
roi_image[mask] = image[mask]
|
35 |
+
out_image[bbox[1]:bbox[3], bbox[0]:bbox[2]] = roi_image
|
36 |
+
|
37 |
+
return out_image
|
38 |
+
|
39 |
+
|
40 |
+
def update_intrinsics_from_bbox(K_org, bbox):
|
41 |
+
device, dtype = K_org.device, K_org.dtype
|
42 |
+
|
43 |
+
K = torch.zeros((K_org.shape[0], 4, 4)
|
44 |
+
).to(device=device, dtype=dtype)
|
45 |
+
K[:, :3, :3] = K_org.clone()
|
46 |
+
K[:, 2, 2] = 0
|
47 |
+
K[:, 2, -1] = 1
|
48 |
+
K[:, -1, 2] = 1
|
49 |
+
|
50 |
+
image_sizes = []
|
51 |
+
for idx, bbox in enumerate(bbox):
|
52 |
+
left, upper, right, lower = bbox
|
53 |
+
cx, cy = K[idx, 0, 2], K[idx, 1, 2]
|
54 |
+
|
55 |
+
new_cx = cx - left
|
56 |
+
new_cy = cy - upper
|
57 |
+
new_height = max(lower - upper, 1)
|
58 |
+
new_width = max(right - left, 1)
|
59 |
+
new_cx = new_width - new_cx
|
60 |
+
new_cy = new_height - new_cy
|
61 |
+
|
62 |
+
K[idx, 0, 2] = new_cx
|
63 |
+
K[idx, 1, 2] = new_cy
|
64 |
+
image_sizes.append((int(new_height), int(new_width)))
|
65 |
+
|
66 |
+
return K, image_sizes
|
67 |
+
|
68 |
+
|
69 |
+
def perspective_projection(x3d, K, R=None, T=None):
|
70 |
+
if R != None:
|
71 |
+
x3d = torch.matmul(R, x3d.transpose(1, 2)).transpose(1, 2)
|
72 |
+
if T != None:
|
73 |
+
x3d = x3d + T.transpose(1, 2)
|
74 |
+
|
75 |
+
x2d = torch.div(x3d, x3d[..., 2:])
|
76 |
+
x2d = torch.matmul(K, x2d.transpose(-1, -2)).transpose(-1, -2)[..., :2]
|
77 |
+
return x2d
|
78 |
+
|
79 |
+
|
80 |
+
def compute_bbox_from_points(X, img_w, img_h, scaleFactor=1.2):
|
81 |
+
left = torch.clamp(X.min(1)[0][:, 0], min=0, max=img_w)
|
82 |
+
right = torch.clamp(X.max(1)[0][:, 0], min=0, max=img_w)
|
83 |
+
top = torch.clamp(X.min(1)[0][:, 1], min=0, max=img_h)
|
84 |
+
bottom = torch.clamp(X.max(1)[0][:, 1], min=0, max=img_h)
|
85 |
+
|
86 |
+
cx = (left + right) / 2
|
87 |
+
cy = (top + bottom) / 2
|
88 |
+
width = (right - left)
|
89 |
+
height = (bottom - top)
|
90 |
+
|
91 |
+
new_left = torch.clamp(cx - width/2 * scaleFactor, min=0, max=img_w-1)
|
92 |
+
new_right = torch.clamp(cx + width/2 * scaleFactor, min=1, max=img_w)
|
93 |
+
new_top = torch.clamp(cy - height / 2 * scaleFactor, min=0, max=img_h-1)
|
94 |
+
new_bottom = torch.clamp(cy + height / 2 * scaleFactor, min=1, max=img_h)
|
95 |
+
|
96 |
+
bbox = torch.stack((new_left.detach(), new_top.detach(),
|
97 |
+
new_right.detach(), new_bottom.detach())).int().float().T
|
98 |
+
|
99 |
+
return bbox
|
100 |
+
|
101 |
+
|
102 |
+
class Renderer():
|
103 |
+
def __init__(self, width, height, focal_length, device,
|
104 |
+
bin_size=None, max_faces_per_bin=None):
|
105 |
+
|
106 |
+
self.width = width
|
107 |
+
self.height = height
|
108 |
+
self.focal_length = focal_length
|
109 |
+
|
110 |
+
self.device = device
|
111 |
+
|
112 |
+
self.initialize_camera_params()
|
113 |
+
self.lights = PointLights(device=device, location=[[0.0, 0.0, -10.0]])
|
114 |
+
self.create_renderer(bin_size, max_faces_per_bin)
|
115 |
+
|
116 |
+
def create_renderer(self, bin_size, max_faces_per_bin):
|
117 |
+
self.renderer = MeshRenderer(
|
118 |
+
rasterizer=MeshRasterizer(
|
119 |
+
raster_settings=RasterizationSettings(
|
120 |
+
image_size=self.image_sizes[0],
|
121 |
+
blur_radius=1e-5, bin_size=bin_size,
|
122 |
+
max_faces_per_bin=max_faces_per_bin),
|
123 |
+
),
|
124 |
+
shader=SoftPhongShader(
|
125 |
+
device=self.device,
|
126 |
+
lights=self.lights,
|
127 |
+
)
|
128 |
+
)
|
129 |
+
|
130 |
+
def initialize_camera_params(self):
|
131 |
+
"""Hard coding for camera parameters
|
132 |
+
TODO: Do some soft coding"""
|
133 |
+
|
134 |
+
# Extrinsics
|
135 |
+
self.R = torch.diag(
|
136 |
+
torch.tensor([1, 1, 1])
|
137 |
+
).float().to(self.device).unsqueeze(0)
|
138 |
+
|
139 |
+
self.T = torch.tensor(
|
140 |
+
[0, 0, 0]
|
141 |
+
).unsqueeze(0).float().to(self.device)
|
142 |
+
|
143 |
+
# Intrinsics
|
144 |
+
self.K = torch.tensor(
|
145 |
+
[[self.focal_length, 0, self.width/2],
|
146 |
+
[0, self.focal_length, self.height/2],
|
147 |
+
[0, 0, 1]]
|
148 |
+
).unsqueeze(0).float().to(self.device)
|
149 |
+
self.bboxes = torch.tensor([[0, 0, self.width, self.height]]).float()
|
150 |
+
self.K_full, self.image_sizes = update_intrinsics_from_bbox(self.K, self.bboxes)
|
151 |
+
|
152 |
+
# self.K_full = self.K # test
|
153 |
+
self.cameras = self.create_camera()
|
154 |
+
|
155 |
+
def create_camera(self, R=None, T=None):
|
156 |
+
if R is not None:
|
157 |
+
self.R = R.clone().view(1, 3, 3).to(self.device)
|
158 |
+
if T is not None:
|
159 |
+
self.T = T.clone().view(1, 3).to(self.device)
|
160 |
+
|
161 |
+
return PerspectiveCameras(
|
162 |
+
device=self.device,
|
163 |
+
R=self.R, #.mT,
|
164 |
+
T=self.T,
|
165 |
+
K=self.K_full,
|
166 |
+
image_size=self.image_sizes,
|
167 |
+
in_ndc=False)
|
168 |
+
|
169 |
+
def create_camera_from_cv(self, R, T, K=None, image_size=None):
|
170 |
+
# R: [1, 3, 3] Tensor
|
171 |
+
# T: [1, 3] Tensor
|
172 |
+
# K: [1, 3, 3] Tensor
|
173 |
+
# image_size: [1, 2] Tensor in HW
|
174 |
+
if K is None:
|
175 |
+
K = self.K
|
176 |
+
|
177 |
+
if image_size is None:
|
178 |
+
image_size = torch.tensor(self.image_sizes)
|
179 |
+
|
180 |
+
cameras = _cameras_from_opencv_projection(R, T, K, image_size)
|
181 |
+
lights = PointLights(device=K.device, location=T)
|
182 |
+
|
183 |
+
return cameras, lights
|
184 |
+
|
185 |
+
def set_ground(self, length, center_x, center_z):
|
186 |
+
device = self.device
|
187 |
+
v, f, vc, fc = map(torch.from_numpy, checkerboard_geometry(length=length, tile_width=1.0, c1=center_x, c2=center_z, up="z"))
|
188 |
+
v[:, 2] -= 2 # z plane
|
189 |
+
v, f, vc = v.to(device), f.to(device), vc.to(device)
|
190 |
+
self.ground_geometry = [v, f, vc]
|
191 |
+
|
192 |
+
|
193 |
+
def update_bbox(self, x3d, scale=2.0, mask=None):
|
194 |
+
""" Update bbox of cameras from the given 3d points
|
195 |
+
|
196 |
+
x3d: input 3D keypoints (or vertices), (num_frames, num_points, 3)
|
197 |
+
"""
|
198 |
+
|
199 |
+
if x3d.size(-1) != 3:
|
200 |
+
x2d = x3d.unsqueeze(0)
|
201 |
+
else:
|
202 |
+
x2d = perspective_projection(x3d.unsqueeze(0), self.K, self.R, self.T.reshape(1, 3, 1))
|
203 |
+
|
204 |
+
if mask is not None:
|
205 |
+
x2d = x2d[:, ~mask]
|
206 |
+
|
207 |
+
bbox = compute_bbox_from_points(x2d, self.width, self.height, scale)
|
208 |
+
self.bboxes = bbox
|
209 |
+
|
210 |
+
self.K_full, self.image_sizes = update_intrinsics_from_bbox(self.K, bbox)
|
211 |
+
self.cameras = self.create_camera()
|
212 |
+
self.create_renderer()
|
213 |
+
|
214 |
+
def reset_bbox(self,):
|
215 |
+
bbox = torch.zeros((1, 4)).float().to(self.device)
|
216 |
+
bbox[0, 2] = self.width
|
217 |
+
bbox[0, 3] = self.height
|
218 |
+
self.bboxes = bbox
|
219 |
+
|
220 |
+
self.K_full, self.image_sizes = update_intrinsics_from_bbox(self.K, bbox)
|
221 |
+
self.cameras = self.create_camera()
|
222 |
+
self.create_renderer()
|
223 |
+
|
224 |
+
def render_mesh(self, vertices, background, colors=[0.8, 0.8, 0.8]):
|
225 |
+
self.update_bbox(vertices[::50], scale=1.2)
|
226 |
+
vertices = vertices.unsqueeze(0)
|
227 |
+
|
228 |
+
if colors[0] > 1: colors = [c / 255. for c in colors]
|
229 |
+
verts_features = torch.tensor(colors).reshape(1, 1, 3).to(device=vertices.device, dtype=vertices.dtype)
|
230 |
+
verts_features = verts_features.repeat(1, vertices.shape[1], 1)
|
231 |
+
textures = TexturesVertex(verts_features=verts_features)
|
232 |
+
|
233 |
+
mesh = Meshes(verts=vertices,
|
234 |
+
faces=self.faces,
|
235 |
+
textures=textures,)
|
236 |
+
|
237 |
+
materials = Materials(
|
238 |
+
device=self.device,
|
239 |
+
specular_color=(colors, ),
|
240 |
+
shininess=0
|
241 |
+
)
|
242 |
+
|
243 |
+
results = torch.flip(
|
244 |
+
self.renderer(mesh, materials=materials, cameras=self.cameras, lights=self.lights),
|
245 |
+
[1, 2]
|
246 |
+
)
|
247 |
+
image = results[0, ..., :3] * 255
|
248 |
+
mask = results[0, ..., -1] > 1e-3
|
249 |
+
|
250 |
+
image = overlay_image_onto_background(image, mask, self.bboxes, background.copy())
|
251 |
+
self.reset_bbox()
|
252 |
+
return image
|
253 |
+
|
254 |
+
|
255 |
+
def render_with_ground(self, verts, faces, colors, cameras, lights):
|
256 |
+
"""
|
257 |
+
:param verts (B, V, 3)
|
258 |
+
:param faces (F, 3)
|
259 |
+
:param colors (B, 3)
|
260 |
+
"""
|
261 |
+
|
262 |
+
# (B, V, 3), (B, F, 3), (B, V, 3)
|
263 |
+
verts, faces, colors = prep_shared_geometry(verts, faces, colors)
|
264 |
+
# (V, 3), (F, 3), (V, 3)
|
265 |
+
gv, gf, gc = self.ground_geometry
|
266 |
+
verts = list(torch.unbind(verts, dim=0)) + [gv]
|
267 |
+
faces = list(torch.unbind(faces, dim=0)) + [gf]
|
268 |
+
colors = list(torch.unbind(colors, dim=0)) + [gc[..., :3]]
|
269 |
+
mesh = create_meshes(verts, faces, colors)
|
270 |
+
|
271 |
+
materials = Materials(
|
272 |
+
device=self.device,
|
273 |
+
shininess=0
|
274 |
+
)
|
275 |
+
|
276 |
+
results = self.renderer(mesh, cameras=cameras, lights=lights, materials=materials)
|
277 |
+
image = (results[0, ..., :3].cpu().numpy() * 255).astype(np.uint8)
|
278 |
+
|
279 |
+
return image
|
280 |
+
|
281 |
+
def render_multiple(self, verts_list, faces_list, colors_list, cameras, lights):
|
282 |
+
"""
|
283 |
+
:param verts (B, V, 3)
|
284 |
+
:param faces (F, 3)
|
285 |
+
:param colors (B, 3)
|
286 |
+
"""
|
287 |
+
# (B, V, 3), (B, F, 3), (B, V, 3)
|
288 |
+
verts_, faces_, colors_ = [], [], []
|
289 |
+
for i, verts in enumerate(verts_list):
|
290 |
+
colors = colors_list[[i]]
|
291 |
+
faces = faces_list[i]
|
292 |
+
verts_i, faces_i, colors_i = prep_shared_geometry(verts, faces, colors)
|
293 |
+
if i == 0:
|
294 |
+
verts_ = list(torch.unbind(verts_i, dim=0))
|
295 |
+
faces_ = list(torch.unbind(faces_i, dim=0))
|
296 |
+
colors_ = list(torch.unbind(colors_i, dim=0))
|
297 |
+
else:
|
298 |
+
verts_ += list(torch.unbind(verts_i, dim=0))
|
299 |
+
faces_ += list(torch.unbind(faces_i, dim=0))
|
300 |
+
colors_ += list(torch.unbind(colors_i, dim=0))
|
301 |
+
|
302 |
+
# (V, 3), (F, 3), (V, 3)
|
303 |
+
gv, gf, gc = self.ground_geometry
|
304 |
+
verts_ += [gv]
|
305 |
+
faces_ += [gf]
|
306 |
+
colors_ += [gc[..., :3]]
|
307 |
+
mesh = create_meshes(verts_, faces_, colors_)
|
308 |
+
|
309 |
+
materials = Materials(
|
310 |
+
device=self.device,
|
311 |
+
shininess=0
|
312 |
+
)
|
313 |
+
results = self.renderer(mesh, cameras=cameras, lights=lights, materials=materials)
|
314 |
+
image = (results[0, ..., :3].cpu().numpy() * 255).astype(np.uint8)
|
315 |
+
mask = results[0, ..., -1].cpu().numpy() > 0
|
316 |
+
return image, mask
|
317 |
+
|
318 |
+
|
319 |
+
def prep_shared_geometry(verts, faces, colors):
|
320 |
+
"""
|
321 |
+
:param verts (B, V, 3)
|
322 |
+
:param faces (F, 3)
|
323 |
+
:param colors (B, 4)
|
324 |
+
"""
|
325 |
+
B, V, _ = verts.shape
|
326 |
+
F, _ = faces.shape
|
327 |
+
colors = colors.unsqueeze(1).expand(B, V, -1)[..., :3]
|
328 |
+
faces = faces.unsqueeze(0).expand(B, F, -1)
|
329 |
+
return verts, faces, colors
|
330 |
+
|
331 |
+
|
332 |
+
def create_meshes(verts, faces, colors):
|
333 |
+
"""
|
334 |
+
:param verts (B, V, 3)
|
335 |
+
:param faces (B, F, 3)
|
336 |
+
:param colors (B, V, 3)
|
337 |
+
"""
|
338 |
+
textures = TexturesVertex(verts_features=colors)
|
339 |
+
meshes = Meshes(verts=verts, faces=faces, textures=textures)
|
340 |
+
return join_meshes_as_scene(meshes)
|
341 |
+
|
342 |
+
|
343 |
+
def get_global_cameras(verts, device, distance=5, position=(-5.0, 5.0, 0.0)):
|
344 |
+
positions = torch.tensor([position]).repeat(len(verts), 1)
|
345 |
+
targets = verts.mean(1)
|
346 |
+
|
347 |
+
directions = targets - positions
|
348 |
+
directions = directions / torch.norm(directions, dim=-1).unsqueeze(-1) * distance
|
349 |
+
positions = targets - directions
|
350 |
+
|
351 |
+
rotation = look_at_rotation(positions, targets, ).mT
|
352 |
+
translation = -(rotation @ positions.unsqueeze(-1)).squeeze(-1)
|
353 |
+
|
354 |
+
lights = PointLights(device=device, location=[position])
|
355 |
+
return rotation, translation, lights
|
356 |
+
|
357 |
+
|
pre-requirements.txt
CHANGED
@@ -34,4 +34,5 @@ easydict
|
|
34 |
loguru
|
35 |
dill
|
36 |
lapx
|
37 |
-
moderngl-window==2.4.6
|
|
|
|
34 |
loguru
|
35 |
dill
|
36 |
lapx
|
37 |
+
moderngl-window==2.4.6
|
38 |
+
imageio[ffmpeg]
|
scripts/scripts_test_video/hawor_video.py
CHANGED
@@ -65,6 +65,11 @@ def hawor_motion_estimation(args, start_idx, end_idx, seq_folder):
|
|
65 |
|
66 |
tid = np.array([tr for tr in tracks])
|
67 |
|
|
|
|
|
|
|
|
|
|
|
68 |
print(f'Running hawor on {video} ...')
|
69 |
|
70 |
left_trk = []
|
@@ -211,6 +216,7 @@ def hawor_motion_estimation(args, start_idx, end_idx, seq_folder):
|
|
211 |
|
212 |
model_masks = model_masks > 0 # bool
|
213 |
np.save(f'{seq_folder}/tracks_{start_idx}_{end_idx}/model_masks.npy', model_masks)
|
|
|
214 |
return frame_chunks_all, img_focal
|
215 |
|
216 |
def hawor_infiller(args, start_idx, end_idx, frame_chunks_all):
|
|
|
65 |
|
66 |
tid = np.array([tr for tr in tracks])
|
67 |
|
68 |
+
if os.path.exists(f'{seq_folder}/tracks_{start_idx}_{end_idx}/frame_chunks_all.npy'):
|
69 |
+
print("skip hawor motion estimation")
|
70 |
+
frame_chunks_all = joblib.load(f'{seq_folder}/tracks_{start_idx}_{end_idx}/frame_chunks_all.npy')
|
71 |
+
return frame_chunks_all, img_focal
|
72 |
+
|
73 |
print(f'Running hawor on {video} ...')
|
74 |
|
75 |
left_trk = []
|
|
|
216 |
|
217 |
model_masks = model_masks > 0 # bool
|
218 |
np.save(f'{seq_folder}/tracks_{start_idx}_{end_idx}/model_masks.npy', model_masks)
|
219 |
+
joblib.dump(frame_chunks_all, f'{seq_folder}/tracks_{start_idx}_{end_idx}/frame_chunks_all.npy')
|
220 |
return frame_chunks_all, img_focal
|
221 |
|
222 |
def hawor_infiller(args, start_idx, end_idx, frame_chunks_all):
|