Our3D / segment_eval_mask.py
yansong1616's picture
Upload 384 files
b177539 verified
import re
import os
import copy
from threading import enumerate
import numpy as np
import torch
from dust3r.inference import inference, load_model
from dust3r.utils.image import load_images, rgb
from dust3r.utils.device import to_numpy
from dust3r.image_pairs import make_pairs
from dust3r.cloud_opt import global_aligner, GlobalAlignerMode
from dust3r.viz import add_scene_cam, CAM_COLORS, OPENGL, pts3d_to_trimesh, cat_meshes
from scipy.spatial.transform import Rotation
import matplotlib.pyplot as plt
import pandas as pd
def get_reconstructed_scene(model, device, image_size, filelist, schedule, niter, scenegraph_type, winsize, refid):
"""
from a list of images, run dust3r inference, global aligner.
"""
imgs = load_images(filelist, size=image_size)
if len(imgs) == 1:
imgs = [imgs[0], copy.deepcopy(imgs[0])]
imgs[1]['idx'] = 1
if scenegraph_type == "swin":
scenegraph_type = scenegraph_type + "-" + str(winsize)
elif scenegraph_type == "oneref":
scenegraph_type = scenegraph_type + "-" + str(refid)
# 图片两两组合
pairs = make_pairs(imgs, scene_graph=scenegraph_type, prefilter=None, symmetrize=True)
output = inference(pairs, model, device, batch_size=batch_size) # 将输入的图片两两成对输入dust3r模型
#关于output:view1、view2分别表示输入模型的两张图片,pred1、pred2分别表示两个分支的输出结果,pred包含pointmap和confidence两个结果
mode = GlobalAlignerMode.PointCloudOptimizer if len(imgs) > 2 else GlobalAlignerMode.PairViewer
lr = 0.01
if mode == GlobalAlignerMode.PointCloudOptimizer:
try: # scene:PointCloudOptimizer
scene = global_aligner(output, device=device, mode=mode)
# ==========Golbal optimization章节,根据公式(5)梯度下降估算世界坐标系下的三维点和外参矩阵=============
loss = scene.compute_global_alignment(init='mst', niter=niter, schedule=schedule, lr=lr)
except Exception as e:# 论文中的Golbal optimization章节,根据公式(5)梯度下降估算世界坐标系下的三维点和外参矩阵
print(e)
scene = global_aligner(output, device='cpu', mode=mode)
print('retrying with cpu')
loss = scene.compute_global_alignment(init='mst', niter=niter, schedule=schedule, lr=lr)
# also return rgb, depth and confidence imgs
# depth is normalized with the max value for all images
# we apply the jet colormap on the confidence maps
rgbimg = scene.imgs
depths = to_numpy(scene.get_depthmaps()) # 深度信息D,即公式(1)上的D
confs = to_numpy([c for c in scene.im_conf])
cmap = plt.get_cmap('jet')
depths_max = max([d.max() for d in depths]) # 获取最大深度值
depths = [d/depths_max for d in depths] # 归一化
confs_max = max([d.max() for d in confs]) # 获取置信度最大值
confs = [cmap(d/confs_max) for d in confs] # 归一化
imgs = []
for i in range(len(rgbimg)):
imgs.append(rgbimg[i])
imgs.append(rgb(depths[i]))
imgs.append(rgb(confs[i]))
return scene, rgbimg
def show_mask(mask, ax, random_color=False):
if random_color:
color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
else:
color = np.array([30/255, 144/255, 255/255, 0.6])
h, w = mask.shape[-2:]
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
ax.imshow(mask_image)
def show_points(coords, labels, ax, marker_size=375):
pos_points = coords[labels==1]
neg_points = coords[labels==0]
ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
def seg(predictor, rgbimgs, masks, target_ind):
print('SAM step...')
# return masks
fig, ax = plt.subplots(len(rgbimgs), 4, figsize=(20, 20))
for i, img in zip(range(len(rgbimgs)),rgbimgs):
predictor.set_image((img * 255).astype(np.uint8))
h,w,c = img.shape
input_point = np.array([
[int(w/2), int(h/2)],
[int(w/2), int(h/2)-30],
])
input_label = np.array([1,1])
m, scores, logits = predictor.predict( # 输入SAM,提示信息是中心的两个point提示
point_coords=input_point,
point_labels=input_label,
multimask_output=False,
)
show_mask(m[0], ax[i][0], random_color=True) # 第一列即m[0]展示的是SAM以上面两个point为提示信息输出的分割结果
show_points(input_point, input_label, ax[i][0])
ax[i][1].imshow(img) # 第二列展示的是原图
ax[i][2].imshow(masks[i].detach().cpu().numpy()) # 第三列展示的是置信度大于阈值的像素点
masks[i] = masks[i].detach().cpu().numpy() & m[0]
ax[i][3].imshow(masks[i]) # 第四列是一、三列分割图的交集
masks[i] = m[0]
plt.savefig("masks.png")
return masks # 返回的是针对每张图片,SAM以上面两个point为提示信息输出的分割结果
def rgb_to_grayscale(img):
"""将RGB图像转换为灰度图。"""
return np.dot(img[..., :3], [0.2989, 0.5870, 0.1140])
def binarize_image(img, threshold=0):
grayscale = rgb_to_grayscale(img)
return (~(grayscale > threshold)).astype(np.uint8) * 255
# by guoyansong
'''
接受的输入是:data/数据集名称/scene,如data/nerf_llff_data(NVOS-all)/horns
'''
def run(img_dir):
dataset_name = img_dir.split('/')[-2]
scene_name = img_dir.split('/')[-1]
outdir = os.path.join("output", dataset_name, scene_name)
model_path = args.model_path
device = 'cuda'
print("=============================================")
print(torch.cuda.is_available())
# 1、===============================加载数据集==============================
from load_nvos import load_nvos_data_evaluate
target_ind, target_mask, all_imgfiles = load_nvos_data_evaluate(
basedir=img_dir)
# print(ref_ind, ref_pose.shape, target_ind, all_imgfiles, all_poses.shape)
# print(target_pose)
from SAM import SamPredictor
from SAM.build_sam import sam_model_registry
sam = sam_model_registry[args.sam_model](checkpoint=args.sam_checkpoint)
sam.to(device=device)
predictor = SamPredictor(sam)
model = load_model(model_path, device) # dust3R
# load_images can take a list of images or a directory
# 2、==============调用DUST3R和Global Alignment获取pointmaps====================
scene, imgs = get_reconstructed_scene( # 调用DUST3R
model=model, device=device,
image_size=512, filelist=all_imgfiles, schedule=schedule,
niter=niter, scenegraph_type="complete", winsize=1, refid=0,
)
poses = scene.get_im_poses() # cam to world 外参数矩阵的逆
intrinsics = scene.get_intrinsics()
pts3d = scene.get_pts3d()
confidence_masks = scene.get_masks()
# 3、===============================调用SAM获取2D masks==========================
# 这里返回的是针对每张图片,SAM以目标中心的两个point为提示信息输出的分割结果
masks = seg(predictor, imgs, confidence_masks, target_ind)
# 4、==============================基于2D masks获取3D masks=====================
pts3d_list = []
color_list = []
for i, mask in zip(range(len(masks)), masks):
# 将SAM分割的结果和三维点云融合,即去除背景只剩下目标物体的三维点
pts3d_list.append(pts3d[i][mask].detach().cpu().numpy())
# 将SAM分割的结果和原图融合,即去除背景,这里是为了取出原图中二维点像素值,给上面的三维点染色(pts3d_list和color_list的点是一一对应的)
color_list.append(imgs[i][mask])
# 将所有的三维点连接在一起,即全部绘制出来表示目标物体的三维点云(即论文中的公式(5)经过梯度下降计算出的世界坐标系下的点)
points_3D = np.concatenate(pts3d_list).reshape(-1, 3)
colors = np.concatenate(color_list).reshape(-1, 3)
# 将点云数据转换为齐次坐标形式 (N, 4),最后一列为1
points_3D = np.hstack((points_3D, np.ones((len(points_3D), 1))))
# 实际中,P 应根据你的相机参数来设置
# P = np.eye(4) # 假设为单位矩阵,即无旋转无平移
# P = poses[target_ind].cpu().detach().numpy()
# P是从相机到世界的变换矩阵,我们需要它的逆来从世界变换到相机坐标系
P = np.linalg.inv(poses[target_ind].cpu().detach().numpy())
print(f'Word 2 Camera:{P}')
# P = np.eye(4)
# P[:3,:4] = target_pose[:,:4]
# 变换点云到相机坐标系
points_camera_coord = np.dot(points_3D, P.T)
# points_camera_coord = points_3D
# 假设的投影矩阵,这里只是一个简化的示例
# 真实情况下,投影矩阵取决于相机的内参等因素
# projection_matrix = np.array([[1, 0, 0, 0],
# [0, 1, 0, 0],
# [0, 0, 1, 0]])
projection_matrix = intrinsics[target_ind].cpu().detach().numpy()
print(f'intrinsics:{projection_matrix}')
# 应用内参矩阵由相机坐标系转成像素坐标系
points_projected = np.dot(points_camera_coord[:, :3], projection_matrix.T)
# 从齐次坐标转换为笛卡尔坐标(欧式坐标)
points_2D = points_projected[:, :2] / points_projected[:, 2, np.newaxis]
# 创建一个空的图像(初始化为白色背景)
image = np.ones(imgs[0].shape)
H, W, _ = image.shape
# 根据需要调整2D投影坐标的尺度和位置,以适应图像的尺寸
# 这里简化处理,直接按比例缩放并居中
# scale = min(W, H) / max(points_2D.max() - points_2D.min(), 1)
# # offset = ((W - (points_2D[:, 0].max() - points_2D[:, 0].min()) * scale) / 2,
# # (H - (points_2D[:, 1].max() - points_2D[:, 1].min()) * scale) / 2)
# points_2D_scaled = points_2D * scale + offset
# 计算2D投影坐标的尺度和位置,以适应图像的尺寸
# x_min, y_min = points_2D.min(axis=0)
# x_max, y_max = points_2D.max(axis=0)
# 保持横纵比不变的缩放
# scale = min(W / (x_max - x_min), H / (y_max - y_min))
# 缩放后的坐标
# points_2D_scaled = (points_2D - [x_min, y_min]) * scale
points_2D_scaled = points_2D
# 在图像上绘制点云
for point, color in zip(points_2D_scaled.astype(int), colors):
x, y = point
# 检查坐标是否在图像范围内
if 0 <= x < W and 0 <= y < H:
image[y, x] = color # 设置像素颜色
import cv2
image = cv2.resize(image, (target_mask.shape[1], target_mask.shape[0]))
binary_image = binarize_image(image, threshold=args.thr)
# 转换为Pillow图像对象,确保数据类型正确
from PIL import Image
pil_image = Image.fromarray(binary_image, 'L') # 'L'代表灰度模式
# 保存图像
os.makedirs(outdir, exist_ok=True)
pil_image.save(os.path.join(outdir, 'point_cloud_projection_mask.png'))
plt.imsave(os.path.join(outdir, 'point_cloud_projection.png'), image)
# 5、======================================计算IoU和Acc==================================
import evaluate
"""
img_dir: data/nerf_llff_data(NVOS-all)/horns
out_dir: output/horns/nerf_llff_data(NVOS-all)/horns
mask_path:output/nerf_llff_data(NVOS-all)/horns/point_cloud_projection_mask.png
gt_path: data/nerf_llff_data(NVOS-all)/masks/horns_center/DJI_20200223_163024_597_mask.png
"""
mask_path = os.path.join(outdir, "point_cloud_projection_mask.png")
gt_dir = os.path.join("data", dataset_name, "masks", scene_name)
gt_path = ""
for name in os.listdir(gt_dir):
if "_mask" in name:
gt_path = os.path.join(gt_dir, name)
assert gt_path != "", os.path.join(outdir, "point_cloud_projection_mask.png") + "路径下没有mask图片"
iou, acc = evaluate.get_eval(mask_path, gt_path)
append_to_excel(os.path.join("output", dataset_name, 'iouAndAcc.xlsx'), scene_name, iou, acc)
# gys:读取mask文件夹下的所有ground truth masks,不再需要经过SAM生成mask
def get_gt_masks(folder_path):
from dust3r.utils.image import load_images, rgb
imgs_mask = load_images(folder_path, 512)
# 定义保存布尔mask的列表
bool_masks = []
for mask in imgs_mask:
image_array = mask['img'].squeeze(0).numpy()
# 将RGB图像转换为灰度图像
# 使用简单的加权方法转换为灰度: Y = 0.299*R + 0.587*G + 0.114*B
gray_image = 0.299 * image_array[0] + 0.587 * image_array[1] + 0.114 * image_array[2]
# 将灰度图像转换为布尔数组(前景为True,背景为False)
bool_array = gray_image > 0
# 将布尔数组添加到列表中
bool_masks.append(bool_array)
# 输出布尔mask的数量
print(f"Total number of mask images processed: {len(bool_masks)}")
return bool_masks
# by guoyansong
'''
接受的输入路径是:data/llff(sanerf-hq)/cecread
不再经过人工提示和SAM生成mask,直接基于每个图片的gt.mask获取3D mask
'''
def run_llff_SANeRF_HQ(img_dir):
dataset_name = img_dir.split('/')[-2]
scene_name = img_dir.split('/')[-1]
outdir = os.path.join("output", dataset_name, scene_name)
model_path = args.model_path
device = 'cuda'
print("=============================================")
print(torch.cuda.is_available())
# 1、===============================加载数据集==============================
from load_nvos import load_nvos_data_evaluate
target_ind, target_mask, all_imgfiles = load_nvos_data_evaluate(
basedir=img_dir)
# print(ref_ind, ref_pose.shape, target_ind, all_imgfiles, all_poses.shape)
# print(target_pose)
# from SAM import SamPredictor
# from SAM.build_sam import sam_model_registry
# sam = sam_model_registry[args.sam_model](checkpoint=args.sam_checkpoint)
# sam.to(device=device)
# predictor = SamPredictor(sam)
model = load_model(model_path, device) # dust3R
# load_images can take a list of images or a directory
# 2、==============调用DUST3R和Global Alignment获取pointmaps====================
scene, imgs = get_reconstructed_scene( # 调用DUST3R
model=model, device=device,
image_size=512, filelist=all_imgfiles, schedule=schedule,
niter=niter, scenegraph_type="complete", winsize=1, refid=0,
)
poses = scene.get_im_poses() # cam to world 外参数矩阵的逆
intrinsics = scene.get_intrinsics()
pts3d = scene.get_pts3d()
confidence_masks = scene.get_masks()
# 3、===============================调用SAM获取2D masks==========================
# 这里返回的是针对每张图片,SAM以目标中心的两个point为提示信息输出的分割结果
# TODO 直接读取mask文件夹下的gt. masks,不再需要SAM输出mask
# masks = seg(predictor, imgs, confidence_masks, target_ind)
gt_masks_dir = os.path.join("data", dataset_name, scene_name, "gt_masks")
masks = get_gt_masks(gt_masks_dir)
# 4、==============================基于2D masks获取3D masks=====================
pts3d_list = []
color_list = []
for i, mask in zip(range(len(masks)), masks):
# 将SAM分割的结果和三维点云融合,即去除背景只剩下目标物体的三维点
# pts3d_list.append(pts3d[i][mask].detach().cpu().numpy())
pts3d_list.append(pts3d[i].detach().cpu().numpy())
# # 将SAM分割的结果和原图融合,即去除背景,这里是为了取出原图中二维点像素值,给上面的三维点染色(pts3d_list和color_list的点是一一对应的)
# color_list.append(imgs[i][mask])
color_list.append(imgs[i])
# 将所有的三维点连接在一起,即全部绘制出来表示目标物体的三维点云(即论文中的公式(5)经过梯度下降计算出的世界坐标系下的点)
points_3D = np.concatenate(pts3d_list).reshape(-1, 3)
colors = np.concatenate(color_list).reshape(-1, 3)
# 将点云数据转换为齐次坐标形式 (N, 4),最后一列为1
points_3D = np.hstack((points_3D, np.ones((len(points_3D), 1))))
# 实际中,P 应根据你的相机参数来设置
# P = np.eye(4) # 假设为单位矩阵,即无旋转无平移
# P = poses[target_ind].cpu().detach().numpy()
# P是从相机到世界的变换矩阵,我们需要它的逆来从世界变换到相机坐标系
P = np.linalg.inv(poses[target_ind].cpu().detach().numpy())
print(f'Word 2 Camera:{P}')
# P = np.eye(4)
# P[:3,:4] = target_pose[:,:4]
# 变换点云到相机坐标系
points_camera_coord = np.dot(points_3D, P.T)
# points_camera_coord = points_3D
# 假设的投影矩阵,这里只是一个简化的示例
# 真实情况下,投影矩阵取决于相机的内参等因素
# projection_matrix = np.array([[1, 0, 0, 0],
# [0, 1, 0, 0],
# [0, 0, 1, 0]])
projection_matrix = intrinsics[target_ind].cpu().detach().numpy()
print(f'intrinsics:{projection_matrix}')
# 应用内参矩阵由相机坐标系转成像素坐标系
points_projected = np.dot(points_camera_coord[:, :3], projection_matrix.T)
# 从齐次坐标转换为笛卡尔坐标(欧式坐标)
points_2D = points_projected[:, :2] / points_projected[:, 2, np.newaxis]
# 创建一个空的图像(初始化为白色背景)
image = np.ones(imgs[0].shape)
H, W, _ = image.shape
# 根据需要调整2D投影坐标的尺度和位置,以适应图像的尺寸
# 这里简化处理,直接按比例缩放并居中
# scale = min(W, H) / max(points_2D.max() - points_2D.min(), 1)
# # offset = ((W - (points_2D[:, 0].max() - points_2D[:, 0].min()) * scale) / 2,
# # (H - (points_2D[:, 1].max() - points_2D[:, 1].min()) * scale) / 2)
# points_2D_scaled = points_2D * scale + offset
# 计算2D投影坐标的尺度和位置,以适应图像的尺寸
# x_min, y_min = points_2D.min(axis=0)
# x_max, y_max = points_2D.max(axis=0)
# 保持横纵比不变的缩放
# scale = min(W / (x_max - x_min), H / (y_max - y_min))
# 缩放后的坐标
# points_2D_scaled = (points_2D - [x_min, y_min]) * scale
points_2D_scaled = points_2D
# 在图像上绘制点云
for point, color in zip(points_2D_scaled.astype(int), colors):
x, y = point
# 检查坐标是否在图像范围内
if 0 <= x < W and 0 <= y < H:
image[y, x] = color # 设置像素颜色
import cv2
image = cv2.resize(image, (target_mask.shape[1], target_mask.shape[0]))
binary_image = binarize_image(image, threshold=args.thr)
# 转换为Pillow图像对象,确保数据类型正确
from PIL import Image
pil_image = Image.fromarray(binary_image, 'L') # 'L'代表灰度模式
# 保存图像
os.makedirs(outdir, exist_ok=True)
pil_image.save(os.path.join(outdir, 'point_cloud_projection_mask.png'))
plt.imsave(os.path.join(outdir, 'point_cloud_projection.png'), image)
# 5、======================================计算IoU和Acc==================================
import evaluate
"""
img_dir: data/nerf_llff_data(NVOS-all)/horns
out_dir: output/horns/nerf_llff_data(NVOS-all)/horns
mask_path:output/nerf_llff_data(NVOS-all)/horns/point_cloud_projection_mask.png
gt_path: data/nerf_llff_data(NVOS-all)/masks/horns_center/DJI_20200223_163024_597_mask.png
"""
mask_path = os.path.join(outdir, "point_cloud_projection_mask.png")
gt_dir = os.path.join("data", dataset_name, "masks", scene_name)
gt_path = ""
for name in os.listdir(gt_dir):
if "_mask" in name:
gt_path = os.path.join(gt_dir, name)
assert gt_path != "", os.path.join(outdir, "point_cloud_projection_mask.png") + "路径下没有mask图片"
iou, acc = evaluate.get_eval(mask_path, gt_path)
append_to_excel(os.path.join("output", dataset_name, 'iouAndAcc.xlsx'), scene_name, iou, acc)
# 将所有scene的iou和acc结果写入Excel,便于求mIoU和mAcc
def append_to_excel(file_path, scene, mIoU, mAcc):
# 创建一个 DataFrame 保存新的数据
data = {
'scene': [scene],
'mIoU': [f"{mIoU:.12f}"],
'mAcc': [f"{mAcc:.12f}"]
}
new_df = pd.DataFrame(data)
# 如果文件不存在,创建新的文件并写入数据
if not os.path.exists(file_path):
with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
new_df.to_excel(writer, index=False)
else:
# 如果文件存在,读取文件并追加新数据
existing_df = pd.read_excel(file_path)
updated_df = pd.concat([existing_df, new_df], ignore_index=True)
with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
updated_df.to_excel(writer, index=False)
# 将所有scene的iou和acc结果写入Excel,便于求mIoU和mAcc
def append_to_excel(file_path, scene, mIoU, mAcc):
# 创建一个 DataFrame 保存新的数据
data = {
'scene': [scene],
'mIoU': [f"{mIoU:.12f}"],
'mAcc': [f"{mAcc:.12f}"]
}
new_df = pd.DataFrame(data)
# 如果文件不存在,创建新的文件并写入数据
if not os.path.exists(file_path):
with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
new_df.to_excel(writer, index=False)
else:
# 如果文件存在,读取文件并追加新数据
existing_df = pd.read_excel(file_path)
updated_df = pd.concat([existing_df, new_df], ignore_index=True)
with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
updated_df.to_excel(writer, index=False)
batch_size = 1
schedule = 'cosine'
lr = 0.01
niter = 50 #global alignment中的迭代次数;初始值是300,本机内存爆炸,这里改小到50
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--img-dir', type=str, default="data/nerf_llff_data(NVOS-all)")
parser.add_argument('--model-path', type=str, default="checkpoints/DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth")
parser.add_argument('--sam-model', type=str, default="vit_b")
parser.add_argument('--sam-checkpoint', type=str, default="checkpoints/sam_vit_b_01ec64.pth")
parser.add_argument('--outdir', type=str, default="output/eval/horns")
parser.add_argument('--thr', type=float, default=0.6) # 0.6
args = parser.parse_args()
#### 测试llff(sanerf-hq)数据集的cecread场景
# run_llff_SANeRF_HQ("data/nerf_llff_data(NVOS-all)/trex")
for f in os.listdir(args.img_dir):
scene_path = os.path.join(args.img_dir, f).replace('\\', '/')
if(os.path.isdir(scene_path) and f != "horns" and f != "masks" and f!="reference_image" and f != "scribbles"):
run_llff_SANeRF_HQ(scene_path)
# import evaluate
#
# """
# img_dir: data/nerf_llff_data(NVOS-all)/horns
# out_dir: output/horns/nerf_llff_data(NVOS-all)/horns
# mask_path:output/nerf_llff_data(NVOS-all)/horns/point_cloud_projection_mask.png
# gt_path: data/nerf_llff_data(NVOS-all)/masks/horns_center/DJI_20200223_163024_597_mask.png
# """
# mask_path = os.path.join(out_dir, "point_cloud_projection_mask.png")
# gt_dir = os.path.join("data", dataset_name, "masks", scene_name)
# gt_path = ""
# for name in os.listdir(gt_dir):
# if "_mask" in name:
# gt_path = os.path.join(gt_dir, name)
#
# assert gt_path != "", os.path.join(out_dir, "point_cloud_projection_mask.png") + "路径下没有mask图片"
# evaluate.get_eval(mask_path, gt_path)
# model_path = args.model_path
# outdir = args.outdir
# device = 'cuda'
# print("=============================================")
# print(torch.cuda.is_available())
# from load_nvos import load_nvos_data
# ref_ind, ref_pose, target_ind, target_pose, target_mask, all_imgfiles, all_poses = load_nvos_data(basedir=args.img_dir)
# # print(ref_ind, ref_pose.shape, target_ind, all_imgfiles, all_poses.shape)
# # print(target_pose)
#
# from SAM import SamPredictor
# from SAM.build_sam import sam_model_registry
# sam = sam_model_registry[args.sam_model](checkpoint=args.sam_checkpoint)
# sam.to(device=device)
# predictor = SamPredictor(sam)
#
# batch_size = 1
# schedule = 'cosine'
# lr = 0.01
# niter = 50 #300,本机内存爆炸,这里改小到50
#
# model = load_model(model_path, device) # dust3R
# # load_images can take a list of images or a directory
# scene, imgs = get_reconstructed_scene( # 调用DUST3R
# model=model, device=device,
# image_size=512, filelist=all_imgfiles, schedule=schedule,
# niter=niter, scenegraph_type="complete", winsize=1, refid=0,
# )
#
# poses = scene.get_im_poses() # cam to world 外参数矩阵的逆
# intrinsics = scene.get_intrinsics()
# pts3d = scene.get_pts3d()
# confidence_masks = scene.get_masks()
#
# # 这里返回的是针对每张图片,SAM以目标中心的两个point为提示信息输出的分割结果
# masks = seg(predictor, imgs, confidence_masks, target_ind)
# pts3d_list = []
# color_list = []
# for i, mask in zip(range(len(masks)),masks):
# # 将SAM分割的结果和三维点云融合,即去除背景只剩下目标物体的三维点
# pts3d_list.append(pts3d[i][mask].detach().cpu().numpy())
# # 将SAM分割的结果和原图融合,即去除背景,这里是为了取出原图中二维点像素值,给上面的三维点染色(pts3d_list和color_list的点是一一对应的)
# color_list.append(imgs[i][mask])
# # 将所有的三维点连接在一起,即全部绘制出来表示目标物体的三维点云(即论文中的公式(5)经过梯度下降计算出的世界坐标系下的点)
# points_3D = np.concatenate(pts3d_list).reshape(-1,3)
# colors = np.concatenate(color_list).reshape(-1,3)
#
# # 将点云数据转换为齐次坐标形式 (N, 4),最后一列为1
# points_3D = np.hstack((points_3D, np.ones((len(points_3D), 1))))
# # 实际中,P 应根据你的相机参数来设置
# # P = np.eye(4) # 假设为单位矩阵,即无旋转无平移
# # P = poses[target_ind].cpu().detach().numpy()
# # P是从相机到世界的变换矩阵,我们需要它的逆来从世界变换到相机坐标系
# P = np.linalg.inv(poses[target_ind].cpu().detach().numpy())
# print(f'Word 2 Camera:{P}')
# # P = np.eye(4)
# # P[:3,:4] = target_pose[:,:4]
#
# # 变换点云到相机坐标系
# points_camera_coord = np.dot(points_3D, P.T)
# # points_camera_coord = points_3D
#
# # 假设的投影矩阵,这里只是一个简化的示例
# # 真实情况下,投影矩阵取决于相机的内参等因素
# # projection_matrix = np.array([[1, 0, 0, 0],
# # [0, 1, 0, 0],
# # [0, 0, 1, 0]])
# projection_matrix = intrinsics[target_ind].cpu().detach().numpy()
# print(f'intrinsics:{projection_matrix}')
#
# # 应用内参矩阵由相机坐标系转成像素坐标系
# points_projected = np.dot(points_camera_coord[:,:3], projection_matrix.T)
#
# # 从齐次坐标转换为笛卡尔坐标(欧式坐标)
# points_2D = points_projected[:, :2] / points_projected[:, 2, np.newaxis]
#
#
# # 创建一个空的图像(初始化为白色背景)
# image = np.ones(imgs[0].shape)
# H,W,_ = image.shape
# # 根据需要调整2D投影坐标的尺度和位置,以适应图像的尺寸
# # 这里简化处理,直接按比例缩放并居中
# # scale = min(W, H) / max(points_2D.max() - points_2D.min(), 1)
# # # offset = ((W - (points_2D[:, 0].max() - points_2D[:, 0].min()) * scale) / 2,
# # # (H - (points_2D[:, 1].max() - points_2D[:, 1].min()) * scale) / 2)
# # points_2D_scaled = points_2D * scale + offset
#
# # 计算2D投影坐标的尺度和位置,以适应图像的尺寸
# #x_min, y_min = points_2D.min(axis=0)
# #x_max, y_max = points_2D.max(axis=0)
# # 保持横纵比不变的缩放
# #scale = min(W / (x_max - x_min), H / (y_max - y_min))
#
# # 缩放后的坐标
# #points_2D_scaled = (points_2D - [x_min, y_min]) * scale
# points_2D_scaled = points_2D
#
# # 在图像上绘制点云
# for point, color in zip(points_2D_scaled.astype(int), colors):
# x, y = point
# # 检查坐标是否在图像范围内
# if 0 <= x < W and 0 <= y < H:
# image[y, x] = color # 设置像素颜色
#
# import cv2
# image = cv2.resize(image, (target_mask.shape[1],target_mask.shape[0]))
# binary_image = binarize_image(image, threshold=args.thr)
# # 转换为Pillow图像对象,确保数据类型正确
# from PIL import Image
# pil_image = Image.fromarray(binary_image, 'L') # 'L'代表灰度模式
#
# # 保存图像
# os.makedirs(args.outdir, exist_ok=True)
# pil_image.save(os.path.join(args.outdir,'point_cloud_projection_mask.png'))
# plt.imsave(os.path.join(args.outdir,'point_cloud_projection.png'), image)