Spaces:
Paused
Paused
import re | |
import os | |
import copy | |
from threading import enumerate | |
import numpy as np | |
import torch | |
from dust3r.inference import inference, load_model | |
from dust3r.utils.image import load_images, rgb | |
from dust3r.utils.device import to_numpy | |
from dust3r.image_pairs import make_pairs | |
from dust3r.cloud_opt import global_aligner, GlobalAlignerMode | |
from dust3r.viz import add_scene_cam, CAM_COLORS, OPENGL, pts3d_to_trimesh, cat_meshes | |
from scipy.spatial.transform import Rotation | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
def get_reconstructed_scene(model, device, image_size, filelist, schedule, niter, scenegraph_type, winsize, refid): | |
""" | |
from a list of images, run dust3r inference, global aligner. | |
""" | |
imgs = load_images(filelist, size=image_size) | |
if len(imgs) == 1: | |
imgs = [imgs[0], copy.deepcopy(imgs[0])] | |
imgs[1]['idx'] = 1 | |
if scenegraph_type == "swin": | |
scenegraph_type = scenegraph_type + "-" + str(winsize) | |
elif scenegraph_type == "oneref": | |
scenegraph_type = scenegraph_type + "-" + str(refid) | |
# 图片两两组合 | |
pairs = make_pairs(imgs, scene_graph=scenegraph_type, prefilter=None, symmetrize=True) | |
output = inference(pairs, model, device, batch_size=batch_size) # 将输入的图片两两成对输入dust3r模型 | |
#关于output:view1、view2分别表示输入模型的两张图片,pred1、pred2分别表示两个分支的输出结果,pred包含pointmap和confidence两个结果 | |
mode = GlobalAlignerMode.PointCloudOptimizer if len(imgs) > 2 else GlobalAlignerMode.PairViewer | |
lr = 0.01 | |
if mode == GlobalAlignerMode.PointCloudOptimizer: | |
try: # scene:PointCloudOptimizer | |
scene = global_aligner(output, device=device, mode=mode) | |
# ==========Golbal optimization章节,根据公式(5)梯度下降估算世界坐标系下的三维点和外参矩阵============= | |
loss = scene.compute_global_alignment(init='mst', niter=niter, schedule=schedule, lr=lr) | |
except Exception as e:# 论文中的Golbal optimization章节,根据公式(5)梯度下降估算世界坐标系下的三维点和外参矩阵 | |
print(e) | |
scene = global_aligner(output, device='cpu', mode=mode) | |
print('retrying with cpu') | |
loss = scene.compute_global_alignment(init='mst', niter=niter, schedule=schedule, lr=lr) | |
# also return rgb, depth and confidence imgs | |
# depth is normalized with the max value for all images | |
# we apply the jet colormap on the confidence maps | |
rgbimg = scene.imgs | |
depths = to_numpy(scene.get_depthmaps()) # 深度信息D,即公式(1)上的D | |
confs = to_numpy([c for c in scene.im_conf]) | |
cmap = plt.get_cmap('jet') | |
depths_max = max([d.max() for d in depths]) # 获取最大深度值 | |
depths = [d/depths_max for d in depths] # 归一化 | |
confs_max = max([d.max() for d in confs]) # 获取置信度最大值 | |
confs = [cmap(d/confs_max) for d in confs] # 归一化 | |
imgs = [] | |
for i in range(len(rgbimg)): | |
imgs.append(rgbimg[i]) | |
imgs.append(rgb(depths[i])) | |
imgs.append(rgb(confs[i])) | |
return scene, rgbimg | |
def show_mask(mask, ax, random_color=False): | |
if random_color: | |
color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0) | |
else: | |
color = np.array([30/255, 144/255, 255/255, 0.6]) | |
h, w = mask.shape[-2:] | |
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1) | |
ax.imshow(mask_image) | |
def show_points(coords, labels, ax, marker_size=375): | |
pos_points = coords[labels==1] | |
neg_points = coords[labels==0] | |
ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25) | |
ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25) | |
def seg(predictor, rgbimgs, masks, target_ind): | |
print('SAM step...') | |
# return masks | |
fig, ax = plt.subplots(len(rgbimgs), 4, figsize=(20, 20)) | |
for i, img in zip(range(len(rgbimgs)),rgbimgs): | |
predictor.set_image((img * 255).astype(np.uint8)) | |
h,w,c = img.shape | |
input_point = np.array([ | |
[int(w/2), int(h/2)], | |
[int(w/2), int(h/2)-30], | |
]) | |
input_label = np.array([1,1]) | |
m, scores, logits = predictor.predict( # 输入SAM,提示信息是中心的两个point提示 | |
point_coords=input_point, | |
point_labels=input_label, | |
multimask_output=False, | |
) | |
show_mask(m[0], ax[i][0], random_color=True) # 第一列即m[0]展示的是SAM以上面两个point为提示信息输出的分割结果 | |
show_points(input_point, input_label, ax[i][0]) | |
ax[i][1].imshow(img) # 第二列展示的是原图 | |
ax[i][2].imshow(masks[i].detach().cpu().numpy()) # 第三列展示的是置信度大于阈值的像素点 | |
masks[i] = masks[i].detach().cpu().numpy() & m[0] | |
ax[i][3].imshow(masks[i]) # 第四列是一、三列分割图的交集 | |
masks[i] = m[0] | |
plt.savefig("masks.png") | |
return masks # 返回的是针对每张图片,SAM以上面两个point为提示信息输出的分割结果 | |
def rgb_to_grayscale(img): | |
"""将RGB图像转换为灰度图。""" | |
return np.dot(img[..., :3], [0.2989, 0.5870, 0.1140]) | |
def binarize_image(img, threshold=0): | |
grayscale = rgb_to_grayscale(img) | |
return (~(grayscale > threshold)).astype(np.uint8) * 255 | |
# by guoyansong | |
''' | |
接受的输入是:data/数据集名称/scene,如data/nerf_llff_data(NVOS-all)/horns | |
''' | |
def run(img_dir): | |
dataset_name = img_dir.split('/')[-2] | |
scene_name = img_dir.split('/')[-1] | |
outdir = os.path.join("output", dataset_name, scene_name) | |
model_path = args.model_path | |
device = 'cuda' | |
print("=============================================") | |
print(torch.cuda.is_available()) | |
# 1、===============================加载数据集============================== | |
from load_nvos import load_nvos_data_evaluate | |
target_ind, target_mask, all_imgfiles = load_nvos_data_evaluate( | |
basedir=img_dir) | |
# print(ref_ind, ref_pose.shape, target_ind, all_imgfiles, all_poses.shape) | |
# print(target_pose) | |
from SAM import SamPredictor | |
from SAM.build_sam import sam_model_registry | |
sam = sam_model_registry[args.sam_model](checkpoint=args.sam_checkpoint) | |
sam.to(device=device) | |
predictor = SamPredictor(sam) | |
model = load_model(model_path, device) # dust3R | |
# load_images can take a list of images or a directory | |
# 2、==============调用DUST3R和Global Alignment获取pointmaps==================== | |
scene, imgs = get_reconstructed_scene( # 调用DUST3R | |
model=model, device=device, | |
image_size=512, filelist=all_imgfiles, schedule=schedule, | |
niter=niter, scenegraph_type="complete", winsize=1, refid=0, | |
) | |
poses = scene.get_im_poses() # cam to world 外参数矩阵的逆 | |
intrinsics = scene.get_intrinsics() | |
pts3d = scene.get_pts3d() | |
confidence_masks = scene.get_masks() | |
# 3、===============================调用SAM获取2D masks========================== | |
# 这里返回的是针对每张图片,SAM以目标中心的两个point为提示信息输出的分割结果 | |
masks = seg(predictor, imgs, confidence_masks, target_ind) | |
# 4、==============================基于2D masks获取3D masks===================== | |
pts3d_list = [] | |
color_list = [] | |
for i, mask in zip(range(len(masks)), masks): | |
# 将SAM分割的结果和三维点云融合,即去除背景只剩下目标物体的三维点 | |
pts3d_list.append(pts3d[i][mask].detach().cpu().numpy()) | |
# 将SAM分割的结果和原图融合,即去除背景,这里是为了取出原图中二维点像素值,给上面的三维点染色(pts3d_list和color_list的点是一一对应的) | |
color_list.append(imgs[i][mask]) | |
# 将所有的三维点连接在一起,即全部绘制出来表示目标物体的三维点云(即论文中的公式(5)经过梯度下降计算出的世界坐标系下的点) | |
points_3D = np.concatenate(pts3d_list).reshape(-1, 3) | |
colors = np.concatenate(color_list).reshape(-1, 3) | |
# 将点云数据转换为齐次坐标形式 (N, 4),最后一列为1 | |
points_3D = np.hstack((points_3D, np.ones((len(points_3D), 1)))) | |
# 实际中,P 应根据你的相机参数来设置 | |
# P = np.eye(4) # 假设为单位矩阵,即无旋转无平移 | |
# P = poses[target_ind].cpu().detach().numpy() | |
# P是从相机到世界的变换矩阵,我们需要它的逆来从世界变换到相机坐标系 | |
P = np.linalg.inv(poses[target_ind].cpu().detach().numpy()) | |
print(f'Word 2 Camera:{P}') | |
# P = np.eye(4) | |
# P[:3,:4] = target_pose[:,:4] | |
# 变换点云到相机坐标系 | |
points_camera_coord = np.dot(points_3D, P.T) | |
# points_camera_coord = points_3D | |
# 假设的投影矩阵,这里只是一个简化的示例 | |
# 真实情况下,投影矩阵取决于相机的内参等因素 | |
# projection_matrix = np.array([[1, 0, 0, 0], | |
# [0, 1, 0, 0], | |
# [0, 0, 1, 0]]) | |
projection_matrix = intrinsics[target_ind].cpu().detach().numpy() | |
print(f'intrinsics:{projection_matrix}') | |
# 应用内参矩阵由相机坐标系转成像素坐标系 | |
points_projected = np.dot(points_camera_coord[:, :3], projection_matrix.T) | |
# 从齐次坐标转换为笛卡尔坐标(欧式坐标) | |
points_2D = points_projected[:, :2] / points_projected[:, 2, np.newaxis] | |
# 创建一个空的图像(初始化为白色背景) | |
image = np.ones(imgs[0].shape) | |
H, W, _ = image.shape | |
# 根据需要调整2D投影坐标的尺度和位置,以适应图像的尺寸 | |
# 这里简化处理,直接按比例缩放并居中 | |
# scale = min(W, H) / max(points_2D.max() - points_2D.min(), 1) | |
# # offset = ((W - (points_2D[:, 0].max() - points_2D[:, 0].min()) * scale) / 2, | |
# # (H - (points_2D[:, 1].max() - points_2D[:, 1].min()) * scale) / 2) | |
# points_2D_scaled = points_2D * scale + offset | |
# 计算2D投影坐标的尺度和位置,以适应图像的尺寸 | |
# x_min, y_min = points_2D.min(axis=0) | |
# x_max, y_max = points_2D.max(axis=0) | |
# 保持横纵比不变的缩放 | |
# scale = min(W / (x_max - x_min), H / (y_max - y_min)) | |
# 缩放后的坐标 | |
# points_2D_scaled = (points_2D - [x_min, y_min]) * scale | |
points_2D_scaled = points_2D | |
# 在图像上绘制点云 | |
for point, color in zip(points_2D_scaled.astype(int), colors): | |
x, y = point | |
# 检查坐标是否在图像范围内 | |
if 0 <= x < W and 0 <= y < H: | |
image[y, x] = color # 设置像素颜色 | |
import cv2 | |
image = cv2.resize(image, (target_mask.shape[1], target_mask.shape[0])) | |
binary_image = binarize_image(image, threshold=args.thr) | |
# 转换为Pillow图像对象,确保数据类型正确 | |
from PIL import Image | |
pil_image = Image.fromarray(binary_image, 'L') # 'L'代表灰度模式 | |
# 保存图像 | |
os.makedirs(outdir, exist_ok=True) | |
pil_image.save(os.path.join(outdir, 'point_cloud_projection_mask.png')) | |
plt.imsave(os.path.join(outdir, 'point_cloud_projection.png'), image) | |
# 5、======================================计算IoU和Acc================================== | |
import evaluate | |
""" | |
img_dir: data/nerf_llff_data(NVOS-all)/horns | |
out_dir: output/horns/nerf_llff_data(NVOS-all)/horns | |
mask_path:output/nerf_llff_data(NVOS-all)/horns/point_cloud_projection_mask.png | |
gt_path: data/nerf_llff_data(NVOS-all)/masks/horns_center/DJI_20200223_163024_597_mask.png | |
""" | |
mask_path = os.path.join(outdir, "point_cloud_projection_mask.png") | |
gt_dir = os.path.join("data", dataset_name, "masks", scene_name) | |
gt_path = "" | |
for name in os.listdir(gt_dir): | |
if "_mask" in name: | |
gt_path = os.path.join(gt_dir, name) | |
assert gt_path != "", os.path.join(outdir, "point_cloud_projection_mask.png") + "路径下没有mask图片" | |
iou, acc = evaluate.get_eval(mask_path, gt_path) | |
append_to_excel(os.path.join("output", dataset_name, 'iouAndAcc.xlsx'), scene_name, iou, acc) | |
# gys:读取mask文件夹下的所有ground truth masks,不再需要经过SAM生成mask | |
def get_gt_masks(folder_path): | |
from dust3r.utils.image import load_images, rgb | |
imgs_mask = load_images(folder_path, 512) | |
# 定义保存布尔mask的列表 | |
bool_masks = [] | |
for mask in imgs_mask: | |
image_array = mask['img'].squeeze(0).numpy() | |
# 将RGB图像转换为灰度图像 | |
# 使用简单的加权方法转换为灰度: Y = 0.299*R + 0.587*G + 0.114*B | |
gray_image = 0.299 * image_array[0] + 0.587 * image_array[1] + 0.114 * image_array[2] | |
# 将灰度图像转换为布尔数组(前景为True,背景为False) | |
bool_array = gray_image > 0 | |
# 将布尔数组添加到列表中 | |
bool_masks.append(bool_array) | |
# 输出布尔mask的数量 | |
print(f"Total number of mask images processed: {len(bool_masks)}") | |
return bool_masks | |
# by guoyansong | |
''' | |
接受的输入路径是:data/llff(sanerf-hq)/cecread | |
不再经过人工提示和SAM生成mask,直接基于每个图片的gt.mask获取3D mask | |
''' | |
def run_llff_SANeRF_HQ(img_dir): | |
dataset_name = img_dir.split('/')[-2] | |
scene_name = img_dir.split('/')[-1] | |
outdir = os.path.join("output", dataset_name, scene_name) | |
model_path = args.model_path | |
device = 'cuda' | |
print("=============================================") | |
print(torch.cuda.is_available()) | |
# 1、===============================加载数据集============================== | |
from load_nvos import load_nvos_data_evaluate | |
target_ind, target_mask, all_imgfiles = load_nvos_data_evaluate( | |
basedir=img_dir) | |
# print(ref_ind, ref_pose.shape, target_ind, all_imgfiles, all_poses.shape) | |
# print(target_pose) | |
# from SAM import SamPredictor | |
# from SAM.build_sam import sam_model_registry | |
# sam = sam_model_registry[args.sam_model](checkpoint=args.sam_checkpoint) | |
# sam.to(device=device) | |
# predictor = SamPredictor(sam) | |
model = load_model(model_path, device) # dust3R | |
# load_images can take a list of images or a directory | |
# 2、==============调用DUST3R和Global Alignment获取pointmaps==================== | |
scene, imgs = get_reconstructed_scene( # 调用DUST3R | |
model=model, device=device, | |
image_size=512, filelist=all_imgfiles, schedule=schedule, | |
niter=niter, scenegraph_type="complete", winsize=1, refid=0, | |
) | |
poses = scene.get_im_poses() # cam to world 外参数矩阵的逆 | |
intrinsics = scene.get_intrinsics() | |
pts3d = scene.get_pts3d() | |
confidence_masks = scene.get_masks() | |
# 3、===============================调用SAM获取2D masks========================== | |
# 这里返回的是针对每张图片,SAM以目标中心的两个point为提示信息输出的分割结果 | |
# TODO 直接读取mask文件夹下的gt. masks,不再需要SAM输出mask | |
# masks = seg(predictor, imgs, confidence_masks, target_ind) | |
gt_masks_dir = os.path.join("data", dataset_name, scene_name, "gt_masks") | |
masks = get_gt_masks(gt_masks_dir) | |
# 4、==============================基于2D masks获取3D masks===================== | |
pts3d_list = [] | |
color_list = [] | |
for i, mask in zip(range(len(masks)), masks): | |
# 将SAM分割的结果和三维点云融合,即去除背景只剩下目标物体的三维点 | |
# pts3d_list.append(pts3d[i][mask].detach().cpu().numpy()) | |
pts3d_list.append(pts3d[i].detach().cpu().numpy()) | |
# # 将SAM分割的结果和原图融合,即去除背景,这里是为了取出原图中二维点像素值,给上面的三维点染色(pts3d_list和color_list的点是一一对应的) | |
# color_list.append(imgs[i][mask]) | |
color_list.append(imgs[i]) | |
# 将所有的三维点连接在一起,即全部绘制出来表示目标物体的三维点云(即论文中的公式(5)经过梯度下降计算出的世界坐标系下的点) | |
points_3D = np.concatenate(pts3d_list).reshape(-1, 3) | |
colors = np.concatenate(color_list).reshape(-1, 3) | |
# 将点云数据转换为齐次坐标形式 (N, 4),最后一列为1 | |
points_3D = np.hstack((points_3D, np.ones((len(points_3D), 1)))) | |
# 实际中,P 应根据你的相机参数来设置 | |
# P = np.eye(4) # 假设为单位矩阵,即无旋转无平移 | |
# P = poses[target_ind].cpu().detach().numpy() | |
# P是从相机到世界的变换矩阵,我们需要它的逆来从世界变换到相机坐标系 | |
P = np.linalg.inv(poses[target_ind].cpu().detach().numpy()) | |
print(f'Word 2 Camera:{P}') | |
# P = np.eye(4) | |
# P[:3,:4] = target_pose[:,:4] | |
# 变换点云到相机坐标系 | |
points_camera_coord = np.dot(points_3D, P.T) | |
# points_camera_coord = points_3D | |
# 假设的投影矩阵,这里只是一个简化的示例 | |
# 真实情况下,投影矩阵取决于相机的内参等因素 | |
# projection_matrix = np.array([[1, 0, 0, 0], | |
# [0, 1, 0, 0], | |
# [0, 0, 1, 0]]) | |
projection_matrix = intrinsics[target_ind].cpu().detach().numpy() | |
print(f'intrinsics:{projection_matrix}') | |
# 应用内参矩阵由相机坐标系转成像素坐标系 | |
points_projected = np.dot(points_camera_coord[:, :3], projection_matrix.T) | |
# 从齐次坐标转换为笛卡尔坐标(欧式坐标) | |
points_2D = points_projected[:, :2] / points_projected[:, 2, np.newaxis] | |
# 创建一个空的图像(初始化为白色背景) | |
image = np.ones(imgs[0].shape) | |
H, W, _ = image.shape | |
# 根据需要调整2D投影坐标的尺度和位置,以适应图像的尺寸 | |
# 这里简化处理,直接按比例缩放并居中 | |
# scale = min(W, H) / max(points_2D.max() - points_2D.min(), 1) | |
# # offset = ((W - (points_2D[:, 0].max() - points_2D[:, 0].min()) * scale) / 2, | |
# # (H - (points_2D[:, 1].max() - points_2D[:, 1].min()) * scale) / 2) | |
# points_2D_scaled = points_2D * scale + offset | |
# 计算2D投影坐标的尺度和位置,以适应图像的尺寸 | |
# x_min, y_min = points_2D.min(axis=0) | |
# x_max, y_max = points_2D.max(axis=0) | |
# 保持横纵比不变的缩放 | |
# scale = min(W / (x_max - x_min), H / (y_max - y_min)) | |
# 缩放后的坐标 | |
# points_2D_scaled = (points_2D - [x_min, y_min]) * scale | |
points_2D_scaled = points_2D | |
# 在图像上绘制点云 | |
for point, color in zip(points_2D_scaled.astype(int), colors): | |
x, y = point | |
# 检查坐标是否在图像范围内 | |
if 0 <= x < W and 0 <= y < H: | |
image[y, x] = color # 设置像素颜色 | |
import cv2 | |
image = cv2.resize(image, (target_mask.shape[1], target_mask.shape[0])) | |
binary_image = binarize_image(image, threshold=args.thr) | |
# 转换为Pillow图像对象,确保数据类型正确 | |
from PIL import Image | |
pil_image = Image.fromarray(binary_image, 'L') # 'L'代表灰度模式 | |
# 保存图像 | |
os.makedirs(outdir, exist_ok=True) | |
pil_image.save(os.path.join(outdir, 'point_cloud_projection_mask.png')) | |
plt.imsave(os.path.join(outdir, 'point_cloud_projection.png'), image) | |
# 5、======================================计算IoU和Acc================================== | |
import evaluate | |
""" | |
img_dir: data/nerf_llff_data(NVOS-all)/horns | |
out_dir: output/horns/nerf_llff_data(NVOS-all)/horns | |
mask_path:output/nerf_llff_data(NVOS-all)/horns/point_cloud_projection_mask.png | |
gt_path: data/nerf_llff_data(NVOS-all)/masks/horns_center/DJI_20200223_163024_597_mask.png | |
""" | |
mask_path = os.path.join(outdir, "point_cloud_projection_mask.png") | |
gt_dir = os.path.join("data", dataset_name, "masks", scene_name) | |
gt_path = "" | |
for name in os.listdir(gt_dir): | |
if "_mask" in name: | |
gt_path = os.path.join(gt_dir, name) | |
assert gt_path != "", os.path.join(outdir, "point_cloud_projection_mask.png") + "路径下没有mask图片" | |
iou, acc = evaluate.get_eval(mask_path, gt_path) | |
append_to_excel(os.path.join("output", dataset_name, 'iouAndAcc.xlsx'), scene_name, iou, acc) | |
# 将所有scene的iou和acc结果写入Excel,便于求mIoU和mAcc | |
def append_to_excel(file_path, scene, mIoU, mAcc): | |
# 创建一个 DataFrame 保存新的数据 | |
data = { | |
'scene': [scene], | |
'mIoU': [f"{mIoU:.12f}"], | |
'mAcc': [f"{mAcc:.12f}"] | |
} | |
new_df = pd.DataFrame(data) | |
# 如果文件不存在,创建新的文件并写入数据 | |
if not os.path.exists(file_path): | |
with pd.ExcelWriter(file_path, engine='openpyxl') as writer: | |
new_df.to_excel(writer, index=False) | |
else: | |
# 如果文件存在,读取文件并追加新数据 | |
existing_df = pd.read_excel(file_path) | |
updated_df = pd.concat([existing_df, new_df], ignore_index=True) | |
with pd.ExcelWriter(file_path, engine='openpyxl') as writer: | |
updated_df.to_excel(writer, index=False) | |
# 将所有scene的iou和acc结果写入Excel,便于求mIoU和mAcc | |
def append_to_excel(file_path, scene, mIoU, mAcc): | |
# 创建一个 DataFrame 保存新的数据 | |
data = { | |
'scene': [scene], | |
'mIoU': [f"{mIoU:.12f}"], | |
'mAcc': [f"{mAcc:.12f}"] | |
} | |
new_df = pd.DataFrame(data) | |
# 如果文件不存在,创建新的文件并写入数据 | |
if not os.path.exists(file_path): | |
with pd.ExcelWriter(file_path, engine='openpyxl') as writer: | |
new_df.to_excel(writer, index=False) | |
else: | |
# 如果文件存在,读取文件并追加新数据 | |
existing_df = pd.read_excel(file_path) | |
updated_df = pd.concat([existing_df, new_df], ignore_index=True) | |
with pd.ExcelWriter(file_path, engine='openpyxl') as writer: | |
updated_df.to_excel(writer, index=False) | |
batch_size = 1 | |
schedule = 'cosine' | |
lr = 0.01 | |
niter = 50 #global alignment中的迭代次数;初始值是300,本机内存爆炸,这里改小到50 | |
if __name__ == '__main__': | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--img-dir', type=str, default="data/nerf_llff_data(NVOS-all)") | |
parser.add_argument('--model-path', type=str, default="checkpoints/DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth") | |
parser.add_argument('--sam-model', type=str, default="vit_b") | |
parser.add_argument('--sam-checkpoint', type=str, default="checkpoints/sam_vit_b_01ec64.pth") | |
parser.add_argument('--outdir', type=str, default="output/eval/horns") | |
parser.add_argument('--thr', type=float, default=0.6) # 0.6 | |
args = parser.parse_args() | |
#### 测试llff(sanerf-hq)数据集的cecread场景 | |
# run_llff_SANeRF_HQ("data/nerf_llff_data(NVOS-all)/trex") | |
for f in os.listdir(args.img_dir): | |
scene_path = os.path.join(args.img_dir, f).replace('\\', '/') | |
if(os.path.isdir(scene_path) and f != "horns" and f != "masks" and f!="reference_image" and f != "scribbles"): | |
run_llff_SANeRF_HQ(scene_path) | |
# import evaluate | |
# | |
# """ | |
# img_dir: data/nerf_llff_data(NVOS-all)/horns | |
# out_dir: output/horns/nerf_llff_data(NVOS-all)/horns | |
# mask_path:output/nerf_llff_data(NVOS-all)/horns/point_cloud_projection_mask.png | |
# gt_path: data/nerf_llff_data(NVOS-all)/masks/horns_center/DJI_20200223_163024_597_mask.png | |
# """ | |
# mask_path = os.path.join(out_dir, "point_cloud_projection_mask.png") | |
# gt_dir = os.path.join("data", dataset_name, "masks", scene_name) | |
# gt_path = "" | |
# for name in os.listdir(gt_dir): | |
# if "_mask" in name: | |
# gt_path = os.path.join(gt_dir, name) | |
# | |
# assert gt_path != "", os.path.join(out_dir, "point_cloud_projection_mask.png") + "路径下没有mask图片" | |
# evaluate.get_eval(mask_path, gt_path) | |
# model_path = args.model_path | |
# outdir = args.outdir | |
# device = 'cuda' | |
# print("=============================================") | |
# print(torch.cuda.is_available()) | |
# from load_nvos import load_nvos_data | |
# ref_ind, ref_pose, target_ind, target_pose, target_mask, all_imgfiles, all_poses = load_nvos_data(basedir=args.img_dir) | |
# # print(ref_ind, ref_pose.shape, target_ind, all_imgfiles, all_poses.shape) | |
# # print(target_pose) | |
# | |
# from SAM import SamPredictor | |
# from SAM.build_sam import sam_model_registry | |
# sam = sam_model_registry[args.sam_model](checkpoint=args.sam_checkpoint) | |
# sam.to(device=device) | |
# predictor = SamPredictor(sam) | |
# | |
# batch_size = 1 | |
# schedule = 'cosine' | |
# lr = 0.01 | |
# niter = 50 #300,本机内存爆炸,这里改小到50 | |
# | |
# model = load_model(model_path, device) # dust3R | |
# # load_images can take a list of images or a directory | |
# scene, imgs = get_reconstructed_scene( # 调用DUST3R | |
# model=model, device=device, | |
# image_size=512, filelist=all_imgfiles, schedule=schedule, | |
# niter=niter, scenegraph_type="complete", winsize=1, refid=0, | |
# ) | |
# | |
# poses = scene.get_im_poses() # cam to world 外参数矩阵的逆 | |
# intrinsics = scene.get_intrinsics() | |
# pts3d = scene.get_pts3d() | |
# confidence_masks = scene.get_masks() | |
# | |
# # 这里返回的是针对每张图片,SAM以目标中心的两个point为提示信息输出的分割结果 | |
# masks = seg(predictor, imgs, confidence_masks, target_ind) | |
# pts3d_list = [] | |
# color_list = [] | |
# for i, mask in zip(range(len(masks)),masks): | |
# # 将SAM分割的结果和三维点云融合,即去除背景只剩下目标物体的三维点 | |
# pts3d_list.append(pts3d[i][mask].detach().cpu().numpy()) | |
# # 将SAM分割的结果和原图融合,即去除背景,这里是为了取出原图中二维点像素值,给上面的三维点染色(pts3d_list和color_list的点是一一对应的) | |
# color_list.append(imgs[i][mask]) | |
# # 将所有的三维点连接在一起,即全部绘制出来表示目标物体的三维点云(即论文中的公式(5)经过梯度下降计算出的世界坐标系下的点) | |
# points_3D = np.concatenate(pts3d_list).reshape(-1,3) | |
# colors = np.concatenate(color_list).reshape(-1,3) | |
# | |
# # 将点云数据转换为齐次坐标形式 (N, 4),最后一列为1 | |
# points_3D = np.hstack((points_3D, np.ones((len(points_3D), 1)))) | |
# # 实际中,P 应根据你的相机参数来设置 | |
# # P = np.eye(4) # 假设为单位矩阵,即无旋转无平移 | |
# # P = poses[target_ind].cpu().detach().numpy() | |
# # P是从相机到世界的变换矩阵,我们需要它的逆来从世界变换到相机坐标系 | |
# P = np.linalg.inv(poses[target_ind].cpu().detach().numpy()) | |
# print(f'Word 2 Camera:{P}') | |
# # P = np.eye(4) | |
# # P[:3,:4] = target_pose[:,:4] | |
# | |
# # 变换点云到相机坐标系 | |
# points_camera_coord = np.dot(points_3D, P.T) | |
# # points_camera_coord = points_3D | |
# | |
# # 假设的投影矩阵,这里只是一个简化的示例 | |
# # 真实情况下,投影矩阵取决于相机的内参等因素 | |
# # projection_matrix = np.array([[1, 0, 0, 0], | |
# # [0, 1, 0, 0], | |
# # [0, 0, 1, 0]]) | |
# projection_matrix = intrinsics[target_ind].cpu().detach().numpy() | |
# print(f'intrinsics:{projection_matrix}') | |
# | |
# # 应用内参矩阵由相机坐标系转成像素坐标系 | |
# points_projected = np.dot(points_camera_coord[:,:3], projection_matrix.T) | |
# | |
# # 从齐次坐标转换为笛卡尔坐标(欧式坐标) | |
# points_2D = points_projected[:, :2] / points_projected[:, 2, np.newaxis] | |
# | |
# | |
# # 创建一个空的图像(初始化为白色背景) | |
# image = np.ones(imgs[0].shape) | |
# H,W,_ = image.shape | |
# # 根据需要调整2D投影坐标的尺度和位置,以适应图像的尺寸 | |
# # 这里简化处理,直接按比例缩放并居中 | |
# # scale = min(W, H) / max(points_2D.max() - points_2D.min(), 1) | |
# # # offset = ((W - (points_2D[:, 0].max() - points_2D[:, 0].min()) * scale) / 2, | |
# # # (H - (points_2D[:, 1].max() - points_2D[:, 1].min()) * scale) / 2) | |
# # points_2D_scaled = points_2D * scale + offset | |
# | |
# # 计算2D投影坐标的尺度和位置,以适应图像的尺寸 | |
# #x_min, y_min = points_2D.min(axis=0) | |
# #x_max, y_max = points_2D.max(axis=0) | |
# # 保持横纵比不变的缩放 | |
# #scale = min(W / (x_max - x_min), H / (y_max - y_min)) | |
# | |
# # 缩放后的坐标 | |
# #points_2D_scaled = (points_2D - [x_min, y_min]) * scale | |
# points_2D_scaled = points_2D | |
# | |
# # 在图像上绘制点云 | |
# for point, color in zip(points_2D_scaled.astype(int), colors): | |
# x, y = point | |
# # 检查坐标是否在图像范围内 | |
# if 0 <= x < W and 0 <= y < H: | |
# image[y, x] = color # 设置像素颜色 | |
# | |
# import cv2 | |
# image = cv2.resize(image, (target_mask.shape[1],target_mask.shape[0])) | |
# binary_image = binarize_image(image, threshold=args.thr) | |
# # 转换为Pillow图像对象,确保数据类型正确 | |
# from PIL import Image | |
# pil_image = Image.fromarray(binary_image, 'L') # 'L'代表灰度模式 | |
# | |
# # 保存图像 | |
# os.makedirs(args.outdir, exist_ok=True) | |
# pil_image.save(os.path.join(args.outdir,'point_cloud_projection_mask.png')) | |
# plt.imsave(os.path.join(args.outdir,'point_cloud_projection.png'), image) | |