import re import os import copy from threading import enumerate import numpy as np import torch from dust3r.inference import inference, load_model from dust3r.utils.image import load_images, rgb from dust3r.utils.device import to_numpy from dust3r.image_pairs import make_pairs from dust3r.cloud_opt import global_aligner, GlobalAlignerMode from dust3r.viz import add_scene_cam, CAM_COLORS, OPENGL, pts3d_to_trimesh, cat_meshes from scipy.spatial.transform import Rotation import matplotlib.pyplot as plt import pandas as pd def get_reconstructed_scene(model, device, image_size, filelist, schedule, niter, scenegraph_type, winsize, refid): """ from a list of images, run dust3r inference, global aligner. """ imgs = load_images(filelist, size=image_size) if len(imgs) == 1: imgs = [imgs[0], copy.deepcopy(imgs[0])] imgs[1]['idx'] = 1 if scenegraph_type == "swin": scenegraph_type = scenegraph_type + "-" + str(winsize) elif scenegraph_type == "oneref": scenegraph_type = scenegraph_type + "-" + str(refid) # 图片两两组合 pairs = make_pairs(imgs, scene_graph=scenegraph_type, prefilter=None, symmetrize=True) output = inference(pairs, model, device, batch_size=batch_size) # 将输入的图片两两成对输入dust3r模型 #关于output:view1、view2分别表示输入模型的两张图片,pred1、pred2分别表示两个分支的输出结果,pred包含pointmap和confidence两个结果 mode = GlobalAlignerMode.PointCloudOptimizer if len(imgs) > 2 else GlobalAlignerMode.PairViewer lr = 0.01 if mode == GlobalAlignerMode.PointCloudOptimizer: try: # scene:PointCloudOptimizer scene = global_aligner(output, device=device, mode=mode) # ==========Golbal optimization章节,根据公式(5)梯度下降估算世界坐标系下的三维点和外参矩阵============= loss = scene.compute_global_alignment(init='mst', niter=niter, schedule=schedule, lr=lr) except Exception as e:# 论文中的Golbal optimization章节,根据公式(5)梯度下降估算世界坐标系下的三维点和外参矩阵 print(e) scene = global_aligner(output, device='cpu', mode=mode) print('retrying with cpu') loss = scene.compute_global_alignment(init='mst', niter=niter, schedule=schedule, lr=lr) # also return rgb, depth and confidence imgs # depth is normalized with the max value for all images # we apply the jet colormap on the confidence maps rgbimg = scene.imgs depths = to_numpy(scene.get_depthmaps()) # 深度信息D,即公式(1)上的D confs = to_numpy([c for c in scene.im_conf]) cmap = plt.get_cmap('jet') depths_max = max([d.max() for d in depths]) # 获取最大深度值 depths = [d/depths_max for d in depths] # 归一化 confs_max = max([d.max() for d in confs]) # 获取置信度最大值 confs = [cmap(d/confs_max) for d in confs] # 归一化 imgs = [] for i in range(len(rgbimg)): imgs.append(rgbimg[i]) imgs.append(rgb(depths[i])) imgs.append(rgb(confs[i])) return scene, rgbimg def show_mask(mask, ax, random_color=False): if random_color: color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0) else: color = np.array([30/255, 144/255, 255/255, 0.6]) h, w = mask.shape[-2:] mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1) ax.imshow(mask_image) def show_points(coords, labels, ax, marker_size=375): pos_points = coords[labels==1] neg_points = coords[labels==0] ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25) ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25) def seg(predictor, rgbimgs, masks, target_ind): print('SAM step...') # return masks fig, ax = plt.subplots(len(rgbimgs), 4, figsize=(20, 20)) for i, img in zip(range(len(rgbimgs)),rgbimgs): predictor.set_image((img * 255).astype(np.uint8)) h,w,c = img.shape input_point = np.array([ [int(w/2), int(h/2)], [int(w/2), int(h/2)-30], ]) input_label = np.array([1,1]) m, scores, logits = predictor.predict( # 输入SAM,提示信息是中心的两个point提示 point_coords=input_point, point_labels=input_label, multimask_output=False, ) show_mask(m[0], ax[i][0], random_color=True) # 第一列即m[0]展示的是SAM以上面两个point为提示信息输出的分割结果 show_points(input_point, input_label, ax[i][0]) ax[i][1].imshow(img) # 第二列展示的是原图 ax[i][2].imshow(masks[i].detach().cpu().numpy()) # 第三列展示的是置信度大于阈值的像素点 masks[i] = masks[i].detach().cpu().numpy() & m[0] ax[i][3].imshow(masks[i]) # 第四列是一、三列分割图的交集 masks[i] = m[0] plt.savefig("masks.png") return masks # 返回的是针对每张图片,SAM以上面两个point为提示信息输出的分割结果 def rgb_to_grayscale(img): """将RGB图像转换为灰度图。""" return np.dot(img[..., :3], [0.2989, 0.5870, 0.1140]) def binarize_image(img, threshold=0): grayscale = rgb_to_grayscale(img) return (~(grayscale > threshold)).astype(np.uint8) * 255 # by guoyansong ''' 接受的输入是:data/数据集名称/scene,如data/nerf_llff_data(NVOS-all)/horns ''' def run(img_dir): dataset_name = img_dir.split('/')[-2] scene_name = img_dir.split('/')[-1] outdir = os.path.join("output", dataset_name, scene_name) model_path = args.model_path device = 'cuda' print("=============================================") print(torch.cuda.is_available()) # 1、===============================加载数据集============================== from load_nvos import load_nvos_data_evaluate target_ind, target_mask, all_imgfiles = load_nvos_data_evaluate( basedir=img_dir) # print(ref_ind, ref_pose.shape, target_ind, all_imgfiles, all_poses.shape) # print(target_pose) from SAM import SamPredictor from SAM.build_sam import sam_model_registry sam = sam_model_registry[args.sam_model](checkpoint=args.sam_checkpoint) sam.to(device=device) predictor = SamPredictor(sam) model = load_model(model_path, device) # dust3R # load_images can take a list of images or a directory # 2、==============调用DUST3R和Global Alignment获取pointmaps==================== scene, imgs = get_reconstructed_scene( # 调用DUST3R model=model, device=device, image_size=512, filelist=all_imgfiles, schedule=schedule, niter=niter, scenegraph_type="complete", winsize=1, refid=0, ) poses = scene.get_im_poses() # cam to world 外参数矩阵的逆 intrinsics = scene.get_intrinsics() pts3d = scene.get_pts3d() confidence_masks = scene.get_masks() # 3、===============================调用SAM获取2D masks========================== # 这里返回的是针对每张图片,SAM以目标中心的两个point为提示信息输出的分割结果 masks = seg(predictor, imgs, confidence_masks, target_ind) # 4、==============================基于2D masks获取3D masks===================== pts3d_list = [] color_list = [] for i, mask in zip(range(len(masks)), masks): # 将SAM分割的结果和三维点云融合,即去除背景只剩下目标物体的三维点 pts3d_list.append(pts3d[i][mask].detach().cpu().numpy()) # 将SAM分割的结果和原图融合,即去除背景,这里是为了取出原图中二维点像素值,给上面的三维点染色(pts3d_list和color_list的点是一一对应的) color_list.append(imgs[i][mask]) # 将所有的三维点连接在一起,即全部绘制出来表示目标物体的三维点云(即论文中的公式(5)经过梯度下降计算出的世界坐标系下的点) points_3D = np.concatenate(pts3d_list).reshape(-1, 3) colors = np.concatenate(color_list).reshape(-1, 3) # 将点云数据转换为齐次坐标形式 (N, 4),最后一列为1 points_3D = np.hstack((points_3D, np.ones((len(points_3D), 1)))) # 实际中,P 应根据你的相机参数来设置 # P = np.eye(4) # 假设为单位矩阵,即无旋转无平移 # P = poses[target_ind].cpu().detach().numpy() # P是从相机到世界的变换矩阵,我们需要它的逆来从世界变换到相机坐标系 P = np.linalg.inv(poses[target_ind].cpu().detach().numpy()) print(f'Word 2 Camera:{P}') # P = np.eye(4) # P[:3,:4] = target_pose[:,:4] # 变换点云到相机坐标系 points_camera_coord = np.dot(points_3D, P.T) # points_camera_coord = points_3D # 假设的投影矩阵,这里只是一个简化的示例 # 真实情况下,投影矩阵取决于相机的内参等因素 # projection_matrix = np.array([[1, 0, 0, 0], # [0, 1, 0, 0], # [0, 0, 1, 0]]) projection_matrix = intrinsics[target_ind].cpu().detach().numpy() print(f'intrinsics:{projection_matrix}') # 应用内参矩阵由相机坐标系转成像素坐标系 points_projected = np.dot(points_camera_coord[:, :3], projection_matrix.T) # 从齐次坐标转换为笛卡尔坐标(欧式坐标) points_2D = points_projected[:, :2] / points_projected[:, 2, np.newaxis] # 创建一个空的图像(初始化为白色背景) image = np.ones(imgs[0].shape) H, W, _ = image.shape # 根据需要调整2D投影坐标的尺度和位置,以适应图像的尺寸 # 这里简化处理,直接按比例缩放并居中 # scale = min(W, H) / max(points_2D.max() - points_2D.min(), 1) # # offset = ((W - (points_2D[:, 0].max() - points_2D[:, 0].min()) * scale) / 2, # # (H - (points_2D[:, 1].max() - points_2D[:, 1].min()) * scale) / 2) # points_2D_scaled = points_2D * scale + offset # 计算2D投影坐标的尺度和位置,以适应图像的尺寸 # x_min, y_min = points_2D.min(axis=0) # x_max, y_max = points_2D.max(axis=0) # 保持横纵比不变的缩放 # scale = min(W / (x_max - x_min), H / (y_max - y_min)) # 缩放后的坐标 # points_2D_scaled = (points_2D - [x_min, y_min]) * scale points_2D_scaled = points_2D # 在图像上绘制点云 for point, color in zip(points_2D_scaled.astype(int), colors): x, y = point # 检查坐标是否在图像范围内 if 0 <= x < W and 0 <= y < H: image[y, x] = color # 设置像素颜色 import cv2 image = cv2.resize(image, (target_mask.shape[1], target_mask.shape[0])) binary_image = binarize_image(image, threshold=args.thr) # 转换为Pillow图像对象,确保数据类型正确 from PIL import Image pil_image = Image.fromarray(binary_image, 'L') # 'L'代表灰度模式 # 保存图像 os.makedirs(outdir, exist_ok=True) pil_image.save(os.path.join(outdir, 'point_cloud_projection_mask.png')) plt.imsave(os.path.join(outdir, 'point_cloud_projection.png'), image) # 5、======================================计算IoU和Acc================================== import evaluate """ img_dir: data/nerf_llff_data(NVOS-all)/horns out_dir: output/horns/nerf_llff_data(NVOS-all)/horns mask_path:output/nerf_llff_data(NVOS-all)/horns/point_cloud_projection_mask.png gt_path: data/nerf_llff_data(NVOS-all)/masks/horns_center/DJI_20200223_163024_597_mask.png """ mask_path = os.path.join(outdir, "point_cloud_projection_mask.png") gt_dir = os.path.join("data", dataset_name, "masks", scene_name) gt_path = "" for name in os.listdir(gt_dir): if "_mask" in name: gt_path = os.path.join(gt_dir, name) assert gt_path != "", os.path.join(outdir, "point_cloud_projection_mask.png") + "路径下没有mask图片" iou, acc = evaluate.get_eval(mask_path, gt_path) append_to_excel(os.path.join("output", dataset_name, 'iouAndAcc.xlsx'), scene_name, iou, acc) # gys:读取mask文件夹下的所有ground truth masks,不再需要经过SAM生成mask def get_gt_masks(folder_path): from dust3r.utils.image import load_images, rgb imgs_mask = load_images(folder_path, 512) # 定义保存布尔mask的列表 bool_masks = [] for mask in imgs_mask: image_array = mask['img'].squeeze(0).numpy() # 将RGB图像转换为灰度图像 # 使用简单的加权方法转换为灰度: Y = 0.299*R + 0.587*G + 0.114*B gray_image = 0.299 * image_array[0] + 0.587 * image_array[1] + 0.114 * image_array[2] # 将灰度图像转换为布尔数组(前景为True,背景为False) bool_array = gray_image > 0 # 将布尔数组添加到列表中 bool_masks.append(bool_array) # 输出布尔mask的数量 print(f"Total number of mask images processed: {len(bool_masks)}") return bool_masks # by guoyansong ''' 接受的输入路径是:data/llff(sanerf-hq)/cecread 不再经过人工提示和SAM生成mask,直接基于每个图片的gt.mask获取3D mask ''' def run_llff_SANeRF_HQ(img_dir): dataset_name = img_dir.split('/')[-2] scene_name = img_dir.split('/')[-1] outdir = os.path.join("output", dataset_name, scene_name) model_path = args.model_path device = 'cuda' print("=============================================") print(torch.cuda.is_available()) # 1、===============================加载数据集============================== from load_nvos import load_nvos_data_evaluate target_ind, target_mask, all_imgfiles = load_nvos_data_evaluate( basedir=img_dir) # print(ref_ind, ref_pose.shape, target_ind, all_imgfiles, all_poses.shape) # print(target_pose) # from SAM import SamPredictor # from SAM.build_sam import sam_model_registry # sam = sam_model_registry[args.sam_model](checkpoint=args.sam_checkpoint) # sam.to(device=device) # predictor = SamPredictor(sam) model = load_model(model_path, device) # dust3R # load_images can take a list of images or a directory # 2、==============调用DUST3R和Global Alignment获取pointmaps==================== scene, imgs = get_reconstructed_scene( # 调用DUST3R model=model, device=device, image_size=512, filelist=all_imgfiles, schedule=schedule, niter=niter, scenegraph_type="complete", winsize=1, refid=0, ) poses = scene.get_im_poses() # cam to world 外参数矩阵的逆 intrinsics = scene.get_intrinsics() pts3d = scene.get_pts3d() confidence_masks = scene.get_masks() # 3、===============================调用SAM获取2D masks========================== # 这里返回的是针对每张图片,SAM以目标中心的两个point为提示信息输出的分割结果 # TODO 直接读取mask文件夹下的gt. masks,不再需要SAM输出mask # masks = seg(predictor, imgs, confidence_masks, target_ind) gt_masks_dir = os.path.join("data", dataset_name, scene_name, "gt_masks") masks = get_gt_masks(gt_masks_dir) # 4、==============================基于2D masks获取3D masks===================== pts3d_list = [] color_list = [] for i, mask in zip(range(len(masks)), masks): # 将SAM分割的结果和三维点云融合,即去除背景只剩下目标物体的三维点 # pts3d_list.append(pts3d[i][mask].detach().cpu().numpy()) pts3d_list.append(pts3d[i].detach().cpu().numpy()) # # 将SAM分割的结果和原图融合,即去除背景,这里是为了取出原图中二维点像素值,给上面的三维点染色(pts3d_list和color_list的点是一一对应的) # color_list.append(imgs[i][mask]) color_list.append(imgs[i]) # 将所有的三维点连接在一起,即全部绘制出来表示目标物体的三维点云(即论文中的公式(5)经过梯度下降计算出的世界坐标系下的点) points_3D = np.concatenate(pts3d_list).reshape(-1, 3) colors = np.concatenate(color_list).reshape(-1, 3) # 将点云数据转换为齐次坐标形式 (N, 4),最后一列为1 points_3D = np.hstack((points_3D, np.ones((len(points_3D), 1)))) # 实际中,P 应根据你的相机参数来设置 # P = np.eye(4) # 假设为单位矩阵,即无旋转无平移 # P = poses[target_ind].cpu().detach().numpy() # P是从相机到世界的变换矩阵,我们需要它的逆来从世界变换到相机坐标系 P = np.linalg.inv(poses[target_ind].cpu().detach().numpy()) print(f'Word 2 Camera:{P}') # P = np.eye(4) # P[:3,:4] = target_pose[:,:4] # 变换点云到相机坐标系 points_camera_coord = np.dot(points_3D, P.T) # points_camera_coord = points_3D # 假设的投影矩阵,这里只是一个简化的示例 # 真实情况下,投影矩阵取决于相机的内参等因素 # projection_matrix = np.array([[1, 0, 0, 0], # [0, 1, 0, 0], # [0, 0, 1, 0]]) projection_matrix = intrinsics[target_ind].cpu().detach().numpy() print(f'intrinsics:{projection_matrix}') # 应用内参矩阵由相机坐标系转成像素坐标系 points_projected = np.dot(points_camera_coord[:, :3], projection_matrix.T) # 从齐次坐标转换为笛卡尔坐标(欧式坐标) points_2D = points_projected[:, :2] / points_projected[:, 2, np.newaxis] # 创建一个空的图像(初始化为白色背景) image = np.ones(imgs[0].shape) H, W, _ = image.shape # 根据需要调整2D投影坐标的尺度和位置,以适应图像的尺寸 # 这里简化处理,直接按比例缩放并居中 # scale = min(W, H) / max(points_2D.max() - points_2D.min(), 1) # # offset = ((W - (points_2D[:, 0].max() - points_2D[:, 0].min()) * scale) / 2, # # (H - (points_2D[:, 1].max() - points_2D[:, 1].min()) * scale) / 2) # points_2D_scaled = points_2D * scale + offset # 计算2D投影坐标的尺度和位置,以适应图像的尺寸 # x_min, y_min = points_2D.min(axis=0) # x_max, y_max = points_2D.max(axis=0) # 保持横纵比不变的缩放 # scale = min(W / (x_max - x_min), H / (y_max - y_min)) # 缩放后的坐标 # points_2D_scaled = (points_2D - [x_min, y_min]) * scale points_2D_scaled = points_2D # 在图像上绘制点云 for point, color in zip(points_2D_scaled.astype(int), colors): x, y = point # 检查坐标是否在图像范围内 if 0 <= x < W and 0 <= y < H: image[y, x] = color # 设置像素颜色 import cv2 image = cv2.resize(image, (target_mask.shape[1], target_mask.shape[0])) binary_image = binarize_image(image, threshold=args.thr) # 转换为Pillow图像对象,确保数据类型正确 from PIL import Image pil_image = Image.fromarray(binary_image, 'L') # 'L'代表灰度模式 # 保存图像 os.makedirs(outdir, exist_ok=True) pil_image.save(os.path.join(outdir, 'point_cloud_projection_mask.png')) plt.imsave(os.path.join(outdir, 'point_cloud_projection.png'), image) # 5、======================================计算IoU和Acc================================== import evaluate """ img_dir: data/nerf_llff_data(NVOS-all)/horns out_dir: output/horns/nerf_llff_data(NVOS-all)/horns mask_path:output/nerf_llff_data(NVOS-all)/horns/point_cloud_projection_mask.png gt_path: data/nerf_llff_data(NVOS-all)/masks/horns_center/DJI_20200223_163024_597_mask.png """ mask_path = os.path.join(outdir, "point_cloud_projection_mask.png") gt_dir = os.path.join("data", dataset_name, "masks", scene_name) gt_path = "" for name in os.listdir(gt_dir): if "_mask" in name: gt_path = os.path.join(gt_dir, name) assert gt_path != "", os.path.join(outdir, "point_cloud_projection_mask.png") + "路径下没有mask图片" iou, acc = evaluate.get_eval(mask_path, gt_path) append_to_excel(os.path.join("output", dataset_name, 'iouAndAcc.xlsx'), scene_name, iou, acc) # 将所有scene的iou和acc结果写入Excel,便于求mIoU和mAcc def append_to_excel(file_path, scene, mIoU, mAcc): # 创建一个 DataFrame 保存新的数据 data = { 'scene': [scene], 'mIoU': [f"{mIoU:.12f}"], 'mAcc': [f"{mAcc:.12f}"] } new_df = pd.DataFrame(data) # 如果文件不存在,创建新的文件并写入数据 if not os.path.exists(file_path): with pd.ExcelWriter(file_path, engine='openpyxl') as writer: new_df.to_excel(writer, index=False) else: # 如果文件存在,读取文件并追加新数据 existing_df = pd.read_excel(file_path) updated_df = pd.concat([existing_df, new_df], ignore_index=True) with pd.ExcelWriter(file_path, engine='openpyxl') as writer: updated_df.to_excel(writer, index=False) # 将所有scene的iou和acc结果写入Excel,便于求mIoU和mAcc def append_to_excel(file_path, scene, mIoU, mAcc): # 创建一个 DataFrame 保存新的数据 data = { 'scene': [scene], 'mIoU': [f"{mIoU:.12f}"], 'mAcc': [f"{mAcc:.12f}"] } new_df = pd.DataFrame(data) # 如果文件不存在,创建新的文件并写入数据 if not os.path.exists(file_path): with pd.ExcelWriter(file_path, engine='openpyxl') as writer: new_df.to_excel(writer, index=False) else: # 如果文件存在,读取文件并追加新数据 existing_df = pd.read_excel(file_path) updated_df = pd.concat([existing_df, new_df], ignore_index=True) with pd.ExcelWriter(file_path, engine='openpyxl') as writer: updated_df.to_excel(writer, index=False) batch_size = 1 schedule = 'cosine' lr = 0.01 niter = 50 #global alignment中的迭代次数;初始值是300,本机内存爆炸,这里改小到50 if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--img-dir', type=str, default="data/nerf_llff_data(NVOS-all)") parser.add_argument('--model-path', type=str, default="checkpoints/DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth") parser.add_argument('--sam-model', type=str, default="vit_b") parser.add_argument('--sam-checkpoint', type=str, default="checkpoints/sam_vit_b_01ec64.pth") parser.add_argument('--outdir', type=str, default="output/eval/horns") parser.add_argument('--thr', type=float, default=0.6) # 0.6 args = parser.parse_args() #### 测试llff(sanerf-hq)数据集的cecread场景 # run_llff_SANeRF_HQ("data/nerf_llff_data(NVOS-all)/trex") for f in os.listdir(args.img_dir): scene_path = os.path.join(args.img_dir, f).replace('\\', '/') if(os.path.isdir(scene_path) and f != "horns" and f != "masks" and f!="reference_image" and f != "scribbles"): run_llff_SANeRF_HQ(scene_path) # import evaluate # # """ # img_dir: data/nerf_llff_data(NVOS-all)/horns # out_dir: output/horns/nerf_llff_data(NVOS-all)/horns # mask_path:output/nerf_llff_data(NVOS-all)/horns/point_cloud_projection_mask.png # gt_path: data/nerf_llff_data(NVOS-all)/masks/horns_center/DJI_20200223_163024_597_mask.png # """ # mask_path = os.path.join(out_dir, "point_cloud_projection_mask.png") # gt_dir = os.path.join("data", dataset_name, "masks", scene_name) # gt_path = "" # for name in os.listdir(gt_dir): # if "_mask" in name: # gt_path = os.path.join(gt_dir, name) # # assert gt_path != "", os.path.join(out_dir, "point_cloud_projection_mask.png") + "路径下没有mask图片" # evaluate.get_eval(mask_path, gt_path) # model_path = args.model_path # outdir = args.outdir # device = 'cuda' # print("=============================================") # print(torch.cuda.is_available()) # from load_nvos import load_nvos_data # ref_ind, ref_pose, target_ind, target_pose, target_mask, all_imgfiles, all_poses = load_nvos_data(basedir=args.img_dir) # # print(ref_ind, ref_pose.shape, target_ind, all_imgfiles, all_poses.shape) # # print(target_pose) # # from SAM import SamPredictor # from SAM.build_sam import sam_model_registry # sam = sam_model_registry[args.sam_model](checkpoint=args.sam_checkpoint) # sam.to(device=device) # predictor = SamPredictor(sam) # # batch_size = 1 # schedule = 'cosine' # lr = 0.01 # niter = 50 #300,本机内存爆炸,这里改小到50 # # model = load_model(model_path, device) # dust3R # # load_images can take a list of images or a directory # scene, imgs = get_reconstructed_scene( # 调用DUST3R # model=model, device=device, # image_size=512, filelist=all_imgfiles, schedule=schedule, # niter=niter, scenegraph_type="complete", winsize=1, refid=0, # ) # # poses = scene.get_im_poses() # cam to world 外参数矩阵的逆 # intrinsics = scene.get_intrinsics() # pts3d = scene.get_pts3d() # confidence_masks = scene.get_masks() # # # 这里返回的是针对每张图片,SAM以目标中心的两个point为提示信息输出的分割结果 # masks = seg(predictor, imgs, confidence_masks, target_ind) # pts3d_list = [] # color_list = [] # for i, mask in zip(range(len(masks)),masks): # # 将SAM分割的结果和三维点云融合,即去除背景只剩下目标物体的三维点 # pts3d_list.append(pts3d[i][mask].detach().cpu().numpy()) # # 将SAM分割的结果和原图融合,即去除背景,这里是为了取出原图中二维点像素值,给上面的三维点染色(pts3d_list和color_list的点是一一对应的) # color_list.append(imgs[i][mask]) # # 将所有的三维点连接在一起,即全部绘制出来表示目标物体的三维点云(即论文中的公式(5)经过梯度下降计算出的世界坐标系下的点) # points_3D = np.concatenate(pts3d_list).reshape(-1,3) # colors = np.concatenate(color_list).reshape(-1,3) # # # 将点云数据转换为齐次坐标形式 (N, 4),最后一列为1 # points_3D = np.hstack((points_3D, np.ones((len(points_3D), 1)))) # # 实际中,P 应根据你的相机参数来设置 # # P = np.eye(4) # 假设为单位矩阵,即无旋转无平移 # # P = poses[target_ind].cpu().detach().numpy() # # P是从相机到世界的变换矩阵,我们需要它的逆来从世界变换到相机坐标系 # P = np.linalg.inv(poses[target_ind].cpu().detach().numpy()) # print(f'Word 2 Camera:{P}') # # P = np.eye(4) # # P[:3,:4] = target_pose[:,:4] # # # 变换点云到相机坐标系 # points_camera_coord = np.dot(points_3D, P.T) # # points_camera_coord = points_3D # # # 假设的投影矩阵,这里只是一个简化的示例 # # 真实情况下,投影矩阵取决于相机的内参等因素 # # projection_matrix = np.array([[1, 0, 0, 0], # # [0, 1, 0, 0], # # [0, 0, 1, 0]]) # projection_matrix = intrinsics[target_ind].cpu().detach().numpy() # print(f'intrinsics:{projection_matrix}') # # # 应用内参矩阵由相机坐标系转成像素坐标系 # points_projected = np.dot(points_camera_coord[:,:3], projection_matrix.T) # # # 从齐次坐标转换为笛卡尔坐标(欧式坐标) # points_2D = points_projected[:, :2] / points_projected[:, 2, np.newaxis] # # # # 创建一个空的图像(初始化为白色背景) # image = np.ones(imgs[0].shape) # H,W,_ = image.shape # # 根据需要调整2D投影坐标的尺度和位置,以适应图像的尺寸 # # 这里简化处理,直接按比例缩放并居中 # # scale = min(W, H) / max(points_2D.max() - points_2D.min(), 1) # # # offset = ((W - (points_2D[:, 0].max() - points_2D[:, 0].min()) * scale) / 2, # # # (H - (points_2D[:, 1].max() - points_2D[:, 1].min()) * scale) / 2) # # points_2D_scaled = points_2D * scale + offset # # # 计算2D投影坐标的尺度和位置,以适应图像的尺寸 # #x_min, y_min = points_2D.min(axis=0) # #x_max, y_max = points_2D.max(axis=0) # # 保持横纵比不变的缩放 # #scale = min(W / (x_max - x_min), H / (y_max - y_min)) # # # 缩放后的坐标 # #points_2D_scaled = (points_2D - [x_min, y_min]) * scale # points_2D_scaled = points_2D # # # 在图像上绘制点云 # for point, color in zip(points_2D_scaled.astype(int), colors): # x, y = point # # 检查坐标是否在图像范围内 # if 0 <= x < W and 0 <= y < H: # image[y, x] = color # 设置像素颜色 # # import cv2 # image = cv2.resize(image, (target_mask.shape[1],target_mask.shape[0])) # binary_image = binarize_image(image, threshold=args.thr) # # 转换为Pillow图像对象,确保数据类型正确 # from PIL import Image # pil_image = Image.fromarray(binary_image, 'L') # 'L'代表灰度模式 # # # 保存图像 # os.makedirs(args.outdir, exist_ok=True) # pil_image.save(os.path.join(args.outdir,'point_cloud_projection_mask.png')) # plt.imsave(os.path.join(args.outdir,'point_cloud_projection.png'), image)