Spaces:

yansong1616
/

Our3D

Paused

File size: 29,773 Bytes

b177539

import re
import os
import copy
from threading import enumerate
import numpy as np
import torch
from dust3r.inference import inference, load_model
from dust3r.utils.image import load_images, rgb
from dust3r.utils.device import to_numpy
from dust3r.image_pairs import make_pairs
from dust3r.cloud_opt import global_aligner, GlobalAlignerMode
from dust3r.viz import add_scene_cam, CAM_COLORS, OPENGL, pts3d_to_trimesh, cat_meshes
from scipy.spatial.transform import Rotation
import matplotlib.pyplot as plt
import pandas as pd

def get_reconstructed_scene(model, device, image_size, filelist, schedule, niter, scenegraph_type, winsize, refid):
    """
    from a list of images, run dust3r inference, global aligner.
    """
    imgs = load_images(filelist, size=image_size)
    if len(imgs) == 1:
        imgs = [imgs[0], copy.deepcopy(imgs[0])]
        imgs[1]['idx'] = 1
    if scenegraph_type == "swin":
        scenegraph_type = scenegraph_type + "-" + str(winsize)
    elif scenegraph_type == "oneref":
        scenegraph_type = scenegraph_type + "-" + str(refid)
    # 图片两两组合
    pairs = make_pairs(imgs, scene_graph=scenegraph_type, prefilter=None, symmetrize=True)
    output = inference(pairs, model, device, batch_size=batch_size) # 将输入的图片两两成对输入dust3r模型
    #关于output：view1、view2分别表示输入模型的两张图片，pred1、pred2分别表示两个分支的输出结果，pred包含pointmap和confidence两个结果
    mode = GlobalAlignerMode.PointCloudOptimizer if len(imgs) > 2 else GlobalAlignerMode.PairViewer
    lr = 0.01

    if mode == GlobalAlignerMode.PointCloudOptimizer:
        try: # scene：PointCloudOptimizer
            scene = global_aligner(output, device=device, mode=mode)
            # ==========Golbal optimization章节，根据公式(5)梯度下降估算世界坐标系下的三维点和外参矩阵=============
            loss = scene.compute_global_alignment(init='mst', niter=niter, schedule=schedule, lr=lr)
        except Exception as e:# 论文中的Golbal optimization章节，根据公式(5)梯度下降估算世界坐标系下的三维点和外参矩阵
            print(e)
            scene = global_aligner(output, device='cpu', mode=mode)
            print('retrying with cpu')
            loss = scene.compute_global_alignment(init='mst', niter=niter, schedule=schedule, lr=lr)

    # also return rgb, depth and confidence imgs
    # depth is normalized with the max value for all images
    # we apply the jet colormap on the confidence maps
    rgbimg = scene.imgs
    depths = to_numpy(scene.get_depthmaps()) # 深度信息D，即公式(1)上的D
    confs = to_numpy([c for c in scene.im_conf])
    cmap = plt.get_cmap('jet')
    depths_max = max([d.max() for d in depths]) # 获取最大深度值
    depths = [d/depths_max for d in depths] # 归一化
    confs_max = max([d.max() for d in confs]) # 获取置信度最大值
    confs = [cmap(d/confs_max) for d in confs] # 归一化

    imgs = []
    for i in range(len(rgbimg)):
        imgs.append(rgbimg[i])
        imgs.append(rgb(depths[i]))
        imgs.append(rgb(confs[i]))

    return scene, rgbimg


def show_mask(mask, ax, random_color=False):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        color = np.array([30/255, 144/255, 255/255, 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)
    
def show_points(coords, labels, ax, marker_size=375):
    pos_points = coords[labels==1]
    neg_points = coords[labels==0]
    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)   

def seg(predictor, rgbimgs, masks, target_ind):
    print('SAM step...')
    # return masks
    fig, ax = plt.subplots(len(rgbimgs), 4, figsize=(20, 20))
    for i, img in zip(range(len(rgbimgs)),rgbimgs):
        predictor.set_image((img * 255).astype(np.uint8))
        h,w,c = img.shape
        input_point = np.array([
            [int(w/2), int(h/2)],
            [int(w/2), int(h/2)-30],
        ])
        input_label = np.array([1,1])
        m, scores, logits = predictor.predict( # 输入SAM，提示信息是中心的两个point提示
            point_coords=input_point,
            point_labels=input_label,
            multimask_output=False,
        )
        show_mask(m[0], ax[i][0], random_color=True) # 第一列即m[0]展示的是SAM以上面两个point为提示信息输出的分割结果
        show_points(input_point, input_label, ax[i][0])
        ax[i][1].imshow(img) # 第二列展示的是原图
        ax[i][2].imshow(masks[i].detach().cpu().numpy()) # 第三列展示的是置信度大于阈值的像素点
        masks[i] = masks[i].detach().cpu().numpy() & m[0]
        ax[i][3].imshow(masks[i]) # 第四列是一、三列分割图的交集
        masks[i] = m[0]
    plt.savefig("masks.png")
    return masks  # 返回的是针对每张图片，SAM以上面两个point为提示信息输出的分割结果

def rgb_to_grayscale(img):
    """将RGB图像转换为灰度图。"""
    return np.dot(img[..., :3], [0.2989, 0.5870, 0.1140])

def binarize_image(img, threshold=0):
    grayscale = rgb_to_grayscale(img)
    return (~(grayscale > threshold)).astype(np.uint8) * 255

# by guoyansong
'''
接受的输入是：data/数据集名称/scene，如data/nerf_llff_data(NVOS-all)/horns
'''
def run(img_dir):
    dataset_name = img_dir.split('/')[-2]
    scene_name = img_dir.split('/')[-1]
    outdir = os.path.join("output", dataset_name, scene_name)


    model_path = args.model_path
    device = 'cuda'
    print("=============================================")
    print(torch.cuda.is_available())

    # 1、===============================加载数据集==============================
    from load_nvos import  load_nvos_data_evaluate
    target_ind, target_mask, all_imgfiles = load_nvos_data_evaluate(
        basedir=img_dir)
    # print(ref_ind, ref_pose.shape, target_ind, all_imgfiles, all_poses.shape)
    # print(target_pose)

    from SAM import SamPredictor
    from SAM.build_sam import sam_model_registry
    sam = sam_model_registry[args.sam_model](checkpoint=args.sam_checkpoint)
    sam.to(device=device)
    predictor = SamPredictor(sam)

    model = load_model(model_path, device)  # dust3R
    # load_images can take a list of images or a directory

    # 2、==============调用DUST3R和Global Alignment获取pointmaps====================
    scene, imgs = get_reconstructed_scene(  # 调用DUST3R
        model=model, device=device,
        image_size=512, filelist=all_imgfiles, schedule=schedule,
        niter=niter, scenegraph_type="complete", winsize=1, refid=0,
    )

    poses = scene.get_im_poses()  # cam to world 外参数矩阵的逆
    intrinsics = scene.get_intrinsics()
    pts3d = scene.get_pts3d()
    confidence_masks = scene.get_masks()

    # 3、===============================调用SAM获取2D masks==========================
    # 这里返回的是针对每张图片，SAM以目标中心的两个point为提示信息输出的分割结果
    masks = seg(predictor, imgs, confidence_masks, target_ind)

    # 4、==============================基于2D masks获取3D masks=====================
    pts3d_list = []
    color_list = []
    for i, mask in zip(range(len(masks)), masks):
        # 将SAM分割的结果和三维点云融合，即去除背景只剩下目标物体的三维点
        pts3d_list.append(pts3d[i][mask].detach().cpu().numpy())
        # 将SAM分割的结果和原图融合，即去除背景，这里是为了取出原图中二维点像素值，给上面的三维点染色（pts3d_list和color_list的点是一一对应的）
        color_list.append(imgs[i][mask])
    # 将所有的三维点连接在一起，即全部绘制出来表示目标物体的三维点云（即论文中的公式(5)经过梯度下降计算出的世界坐标系下的点）
    points_3D = np.concatenate(pts3d_list).reshape(-1, 3)
    colors = np.concatenate(color_list).reshape(-1, 3)

    # 将点云数据转换为齐次坐标形式 (N, 4)，最后一列为1
    points_3D = np.hstack((points_3D, np.ones((len(points_3D), 1))))
    # 实际中，P 应根据你的相机参数来设置
    # P = np.eye(4)  # 假设为单位矩阵，即无旋转无平移
    # P = poses[target_ind].cpu().detach().numpy()
    # P是从相机到世界的变换矩阵，我们需要它的逆来从世界变换到相机坐标系
    P = np.linalg.inv(poses[target_ind].cpu().detach().numpy())
    print(f'Word 2 Camera:{P}')
    # P = np.eye(4)
    # P[:3,:4] = target_pose[:,:4]

    # 变换点云到相机坐标系
    points_camera_coord = np.dot(points_3D, P.T)
    # points_camera_coord = points_3D

    # 假设的投影矩阵，这里只是一个简化的示例
    # 真实情况下，投影矩阵取决于相机的内参等因素
    # projection_matrix = np.array([[1, 0, 0, 0],
    #                             [0, 1, 0, 0],
    #                             [0, 0, 1, 0]])
    projection_matrix = intrinsics[target_ind].cpu().detach().numpy()
    print(f'intrinsics:{projection_matrix}')

    # 应用内参矩阵由相机坐标系转成像素坐标系
    points_projected = np.dot(points_camera_coord[:, :3], projection_matrix.T)

    # 从齐次坐标转换为笛卡尔坐标(欧式坐标)
    points_2D = points_projected[:, :2] / points_projected[:, 2, np.newaxis]

    # 创建一个空的图像（初始化为白色背景）
    image = np.ones(imgs[0].shape)
    H, W, _ = image.shape
    # 根据需要调整2D投影坐标的尺度和位置，以适应图像的尺寸
    # 这里简化处理，直接按比例缩放并居中
    # scale = min(W, H) / max(points_2D.max() - points_2D.min(), 1)
    # # offset = ((W - (points_2D[:, 0].max() - points_2D[:, 0].min()) * scale) / 2,
    # #         (H - (points_2D[:, 1].max() - points_2D[:, 1].min()) * scale) / 2)
    # points_2D_scaled = points_2D * scale + offset

    # 计算2D投影坐标的尺度和位置，以适应图像的尺寸
    # x_min, y_min = points_2D.min(axis=0)
    # x_max, y_max = points_2D.max(axis=0)
    # 保持横纵比不变的缩放
    # scale = min(W / (x_max - x_min), H / (y_max - y_min))

    # 缩放后的坐标
    # points_2D_scaled = (points_2D - [x_min, y_min]) * scale
    points_2D_scaled = points_2D

    # 在图像上绘制点云
    for point, color in zip(points_2D_scaled.astype(int), colors):
        x, y = point
        # 检查坐标是否在图像范围内
        if 0 <= x < W and 0 <= y < H:
            image[y, x] = color  # 设置像素颜色

    import cv2
    image = cv2.resize(image, (target_mask.shape[1], target_mask.shape[0]))
    binary_image = binarize_image(image, threshold=args.thr)
    # 转换为Pillow图像对象，确保数据类型正确
    from PIL import Image
    pil_image = Image.fromarray(binary_image, 'L')  # 'L'代表灰度模式

    # 保存图像
    os.makedirs(outdir, exist_ok=True)
    pil_image.save(os.path.join(outdir, 'point_cloud_projection_mask.png'))
    plt.imsave(os.path.join(outdir, 'point_cloud_projection.png'), image)


    # 5、======================================计算IoU和Acc==================================
    import evaluate
    """
    img_dir:  data/nerf_llff_data(NVOS-all)/horns
    out_dir:  output/horns/nerf_llff_data(NVOS-all)/horns
    mask_path:output/nerf_llff_data(NVOS-all)/horns/point_cloud_projection_mask.png
    gt_path:  data/nerf_llff_data(NVOS-all)/masks/horns_center/DJI_20200223_163024_597_mask.png
    """
    mask_path = os.path.join(outdir, "point_cloud_projection_mask.png")
    gt_dir = os.path.join("data", dataset_name, "masks", scene_name)
    gt_path = ""
    for name in os.listdir(gt_dir):
        if "_mask" in name:
            gt_path = os.path.join(gt_dir, name)

    assert gt_path != "", os.path.join(outdir, "point_cloud_projection_mask.png") + "路径下没有mask图片"
    iou, acc = evaluate.get_eval(mask_path, gt_path)
    append_to_excel(os.path.join("output", dataset_name, 'iouAndAcc.xlsx'), scene_name, iou, acc)


# gys：读取mask文件夹下的所有ground truth masks，不再需要经过SAM生成mask
def get_gt_masks(folder_path):
    from dust3r.utils.image import load_images, rgb
    imgs_mask = load_images(folder_path, 512)
    # 定义保存布尔mask的列表
    bool_masks = []

    for mask in imgs_mask:
        image_array = mask['img'].squeeze(0).numpy()
        # 将RGB图像转换为灰度图像
        # 使用简单的加权方法转换为灰度: Y = 0.299*R + 0.587*G + 0.114*B
        gray_image = 0.299 * image_array[0] + 0.587 * image_array[1] + 0.114 * image_array[2]

        # 将灰度图像转换为布尔数组（前景为True，背景为False）
        bool_array = gray_image > 0

        # 将布尔数组添加到列表中
        bool_masks.append(bool_array)

    # 输出布尔mask的数量
    print(f"Total number of mask images processed: {len(bool_masks)}")
    return bool_masks

# by guoyansong
'''
接受的输入路径是：data/llff(sanerf-hq)/cecread
不再经过人工提示和SAM生成mask，直接基于每个图片的gt.mask获取3D mask
'''
def run_llff_SANeRF_HQ(img_dir):
    dataset_name = img_dir.split('/')[-2]
    scene_name = img_dir.split('/')[-1]
    outdir = os.path.join("output", dataset_name, scene_name)


    model_path = args.model_path
    device = 'cuda'
    print("=============================================")
    print(torch.cuda.is_available())

    # 1、===============================加载数据集==============================
    from load_nvos import  load_nvos_data_evaluate
    target_ind, target_mask, all_imgfiles = load_nvos_data_evaluate(
        basedir=img_dir)
    # print(ref_ind, ref_pose.shape, target_ind, all_imgfiles, all_poses.shape)
    # print(target_pose)

    # from SAM import SamPredictor
    # from SAM.build_sam import sam_model_registry
    # sam = sam_model_registry[args.sam_model](checkpoint=args.sam_checkpoint)
    # sam.to(device=device)
    # predictor = SamPredictor(sam)

    model = load_model(model_path, device)  # dust3R
    # load_images can take a list of images or a directory

    # 2、==============调用DUST3R和Global Alignment获取pointmaps====================
    scene, imgs = get_reconstructed_scene(  # 调用DUST3R
        model=model, device=device,
        image_size=512, filelist=all_imgfiles, schedule=schedule,
        niter=niter, scenegraph_type="complete", winsize=1, refid=0,
    )

    poses = scene.get_im_poses()  # cam to world 外参数矩阵的逆
    intrinsics = scene.get_intrinsics()
    pts3d = scene.get_pts3d()
    confidence_masks = scene.get_masks()

    # 3、===============================调用SAM获取2D masks==========================
    # 这里返回的是针对每张图片，SAM以目标中心的两个point为提示信息输出的分割结果
    # TODO 直接读取mask文件夹下的gt. masks，不再需要SAM输出mask
    # masks = seg(predictor, imgs, confidence_masks, target_ind)
    gt_masks_dir = os.path.join("data", dataset_name, scene_name, "gt_masks")
    masks = get_gt_masks(gt_masks_dir)

    # 4、==============================基于2D masks获取3D masks=====================
    pts3d_list = []
    color_list = []
    for i, mask in zip(range(len(masks)), masks):
        # 将SAM分割的结果和三维点云融合，即去除背景只剩下目标物体的三维点
        # pts3d_list.append(pts3d[i][mask].detach().cpu().numpy())
        pts3d_list.append(pts3d[i].detach().cpu().numpy())
        # # 将SAM分割的结果和原图融合，即去除背景，这里是为了取出原图中二维点像素值，给上面的三维点染色（pts3d_list和color_list的点是一一对应的）
        # color_list.append(imgs[i][mask])
        color_list.append(imgs[i])
    # 将所有的三维点连接在一起，即全部绘制出来表示目标物体的三维点云（即论文中的公式(5)经过梯度下降计算出的世界坐标系下的点）
    points_3D = np.concatenate(pts3d_list).reshape(-1, 3)
    colors = np.concatenate(color_list).reshape(-1, 3)

    # 将点云数据转换为齐次坐标形式 (N, 4)，最后一列为1
    points_3D = np.hstack((points_3D, np.ones((len(points_3D), 1))))
    # 实际中，P 应根据你的相机参数来设置
    # P = np.eye(4)  # 假设为单位矩阵，即无旋转无平移
    # P = poses[target_ind].cpu().detach().numpy()
    # P是从相机到世界的变换矩阵，我们需要它的逆来从世界变换到相机坐标系
    P = np.linalg.inv(poses[target_ind].cpu().detach().numpy())
    print(f'Word 2 Camera:{P}')
    # P = np.eye(4)
    # P[:3,:4] = target_pose[:,:4]

    # 变换点云到相机坐标系
    points_camera_coord = np.dot(points_3D, P.T)
    # points_camera_coord = points_3D

    # 假设的投影矩阵，这里只是一个简化的示例
    # 真实情况下，投影矩阵取决于相机的内参等因素
    # projection_matrix = np.array([[1, 0, 0, 0],
    #                             [0, 1, 0, 0],
    #                             [0, 0, 1, 0]])
    projection_matrix = intrinsics[target_ind].cpu().detach().numpy()
    print(f'intrinsics:{projection_matrix}')

    # 应用内参矩阵由相机坐标系转成像素坐标系
    points_projected = np.dot(points_camera_coord[:, :3], projection_matrix.T)

    # 从齐次坐标转换为笛卡尔坐标(欧式坐标)
    points_2D = points_projected[:, :2] / points_projected[:, 2, np.newaxis]

    # 创建一个空的图像（初始化为白色背景）
    image = np.ones(imgs[0].shape)
    H, W, _ = image.shape
    # 根据需要调整2D投影坐标的尺度和位置，以适应图像的尺寸
    # 这里简化处理，直接按比例缩放并居中
    # scale = min(W, H) / max(points_2D.max() - points_2D.min(), 1)
    # # offset = ((W - (points_2D[:, 0].max() - points_2D[:, 0].min()) * scale) / 2,
    # #         (H - (points_2D[:, 1].max() - points_2D[:, 1].min()) * scale) / 2)
    # points_2D_scaled = points_2D * scale + offset

    # 计算2D投影坐标的尺度和位置，以适应图像的尺寸
    # x_min, y_min = points_2D.min(axis=0)
    # x_max, y_max = points_2D.max(axis=0)
    # 保持横纵比不变的缩放
    # scale = min(W / (x_max - x_min), H / (y_max - y_min))

    # 缩放后的坐标
    # points_2D_scaled = (points_2D - [x_min, y_min]) * scale
    points_2D_scaled = points_2D

    # 在图像上绘制点云
    for point, color in zip(points_2D_scaled.astype(int), colors):
        x, y = point
        # 检查坐标是否在图像范围内
        if 0 <= x < W and 0 <= y < H:
            image[y, x] = color  # 设置像素颜色

    import cv2
    image = cv2.resize(image, (target_mask.shape[1], target_mask.shape[0]))
    binary_image = binarize_image(image, threshold=args.thr)
    # 转换为Pillow图像对象，确保数据类型正确
    from PIL import Image
    pil_image = Image.fromarray(binary_image, 'L')  # 'L'代表灰度模式

    # 保存图像
    os.makedirs(outdir, exist_ok=True)
    pil_image.save(os.path.join(outdir, 'point_cloud_projection_mask.png'))
    plt.imsave(os.path.join(outdir, 'point_cloud_projection.png'), image)


    # 5、======================================计算IoU和Acc==================================
    import evaluate
    """
    img_dir:  data/nerf_llff_data(NVOS-all)/horns
    out_dir:  output/horns/nerf_llff_data(NVOS-all)/horns
    mask_path:output/nerf_llff_data(NVOS-all)/horns/point_cloud_projection_mask.png
    gt_path:  data/nerf_llff_data(NVOS-all)/masks/horns_center/DJI_20200223_163024_597_mask.png
    """
    mask_path = os.path.join(outdir, "point_cloud_projection_mask.png")
    gt_dir = os.path.join("data", dataset_name, "masks", scene_name)
    gt_path = ""
    for name in os.listdir(gt_dir):
        if "_mask" in name:
            gt_path = os.path.join(gt_dir, name)

    assert gt_path != "", os.path.join(outdir, "point_cloud_projection_mask.png") + "路径下没有mask图片"
    iou, acc = evaluate.get_eval(mask_path, gt_path)
    append_to_excel(os.path.join("output", dataset_name, 'iouAndAcc.xlsx'), scene_name, iou, acc)


# 将所有scene的iou和acc结果写入Excel，便于求mIoU和mAcc
def append_to_excel(file_path, scene, mIoU, mAcc):
    # 创建一个 DataFrame 保存新的数据
    data = {
        'scene': [scene],
        'mIoU': [f"{mIoU:.12f}"],
        'mAcc': [f"{mAcc:.12f}"]
    }
    new_df = pd.DataFrame(data)

    # 如果文件不存在，创建新的文件并写入数据
    if not os.path.exists(file_path):
        with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
            new_df.to_excel(writer, index=False)
    else:
        # 如果文件存在，读取文件并追加新数据
        existing_df = pd.read_excel(file_path)
        updated_df = pd.concat([existing_df, new_df], ignore_index=True)

        with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
            updated_df.to_excel(writer, index=False)


# 将所有scene的iou和acc结果写入Excel，便于求mIoU和mAcc
def append_to_excel(file_path, scene, mIoU, mAcc):
    # 创建一个 DataFrame 保存新的数据
    data = {
        'scene': [scene],
        'mIoU': [f"{mIoU:.12f}"],
        'mAcc': [f"{mAcc:.12f}"]
    }
    new_df = pd.DataFrame(data)

    # 如果文件不存在，创建新的文件并写入数据
    if not os.path.exists(file_path):
        with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
            new_df.to_excel(writer, index=False)
    else:
        # 如果文件存在，读取文件并追加新数据
        existing_df = pd.read_excel(file_path)
        updated_df = pd.concat([existing_df, new_df], ignore_index=True)

        with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
            updated_df.to_excel(writer, index=False)

batch_size = 1
schedule = 'cosine'
lr = 0.01
niter = 50 #global alignment中的迭代次数；初始值是300，本机内存爆炸，这里改小到50

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--img-dir', type=str, default="data/nerf_llff_data(NVOS-all)")
    parser.add_argument('--model-path', type=str, default="checkpoints/DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth")
    parser.add_argument('--sam-model', type=str, default="vit_b")
    parser.add_argument('--sam-checkpoint', type=str, default="checkpoints/sam_vit_b_01ec64.pth")
    parser.add_argument('--outdir', type=str, default="output/eval/horns")
    parser.add_argument('--thr', type=float, default=0.6) # 0.6
    args = parser.parse_args()


    #### 测试llff(sanerf-hq)数据集的cecread场景
    # run_llff_SANeRF_HQ("data/nerf_llff_data(NVOS-all)/trex")


    for f in os.listdir(args.img_dir):
        scene_path = os.path.join(args.img_dir, f).replace('\\', '/')
        if(os.path.isdir(scene_path) and f != "horns" and f != "masks" and f!="reference_image" and f != "scribbles"):
            run_llff_SANeRF_HQ(scene_path)





    # import evaluate
    #
    # """
    # img_dir:  data/nerf_llff_data(NVOS-all)/horns
    # out_dir:  output/horns/nerf_llff_data(NVOS-all)/horns
    # mask_path:output/nerf_llff_data(NVOS-all)/horns/point_cloud_projection_mask.png
    # gt_path:  data/nerf_llff_data(NVOS-all)/masks/horns_center/DJI_20200223_163024_597_mask.png
    # """
    # mask_path = os.path.join(out_dir, "point_cloud_projection_mask.png")
    # gt_dir = os.path.join("data", dataset_name, "masks", scene_name)
    # gt_path = ""
    # for name in os.listdir(gt_dir):
    #     if "_mask" in name:
    #         gt_path = os.path.join(gt_dir, name)
    #
    # assert gt_path != "", os.path.join(out_dir, "point_cloud_projection_mask.png") + "路径下没有mask图片"
    # evaluate.get_eval(mask_path, gt_path)


    # model_path = args.model_path
    # outdir = args.outdir
    # device = 'cuda'
    # print("=============================================")
    # print(torch.cuda.is_available())

    # from load_nvos import load_nvos_data
    # ref_ind, ref_pose, target_ind, target_pose, target_mask, all_imgfiles, all_poses  = load_nvos_data(basedir=args.img_dir)
    # # print(ref_ind, ref_pose.shape, target_ind, all_imgfiles, all_poses.shape)
    # # print(target_pose)
    #
    # from SAM import SamPredictor
    # from SAM.build_sam import sam_model_registry
    # sam = sam_model_registry[args.sam_model](checkpoint=args.sam_checkpoint)
    # sam.to(device=device)
    # predictor = SamPredictor(sam)
    #
    # batch_size = 1
    # schedule = 'cosine'
    # lr = 0.01
    # niter = 50 #300，本机内存爆炸，这里改小到50
    #
    # model = load_model(model_path, device) # dust3R
    # # load_images can take a list of images or a directory
    # scene, imgs = get_reconstructed_scene( # 调用DUST3R
    #     model=model, device=device,
    #     image_size=512, filelist=all_imgfiles, schedule=schedule,
    #     niter=niter, scenegraph_type="complete", winsize=1, refid=0,
    # )
    #
    # poses = scene.get_im_poses() # cam to world 外参数矩阵的逆
    # intrinsics = scene.get_intrinsics()
    # pts3d = scene.get_pts3d()
    # confidence_masks = scene.get_masks()
    #
    # # 这里返回的是针对每张图片，SAM以目标中心的两个point为提示信息输出的分割结果
    # masks = seg(predictor, imgs, confidence_masks, target_ind)
    # pts3d_list = []
    # color_list = []
    # for i, mask in zip(range(len(masks)),masks):
    #     # 将SAM分割的结果和三维点云融合，即去除背景只剩下目标物体的三维点
    #     pts3d_list.append(pts3d[i][mask].detach().cpu().numpy())
    #     # 将SAM分割的结果和原图融合，即去除背景，这里是为了取出原图中二维点像素值，给上面的三维点染色（pts3d_list和color_list的点是一一对应的）
    #     color_list.append(imgs[i][mask])
    # # 将所有的三维点连接在一起，即全部绘制出来表示目标物体的三维点云（即论文中的公式(5)经过梯度下降计算出的世界坐标系下的点）
    # points_3D = np.concatenate(pts3d_list).reshape(-1,3)
    # colors = np.concatenate(color_list).reshape(-1,3)
    #
    # # 将点云数据转换为齐次坐标形式 (N, 4)，最后一列为1
    # points_3D = np.hstack((points_3D, np.ones((len(points_3D), 1))))
    # # 实际中，P 应根据你的相机参数来设置
    # # P = np.eye(4)  # 假设为单位矩阵，即无旋转无平移
    # # P = poses[target_ind].cpu().detach().numpy()
    # # P是从相机到世界的变换矩阵，我们需要它的逆来从世界变换到相机坐标系
    # P = np.linalg.inv(poses[target_ind].cpu().detach().numpy())
    # print(f'Word 2 Camera:{P}')
    # # P = np.eye(4)
    # # P[:3,:4] = target_pose[:,:4]
    #
    # # 变换点云到相机坐标系
    # points_camera_coord = np.dot(points_3D, P.T)
    # # points_camera_coord = points_3D
    #
    # # 假设的投影矩阵，这里只是一个简化的示例
    # # 真实情况下，投影矩阵取决于相机的内参等因素
    # # projection_matrix = np.array([[1, 0, 0, 0],
    # #                             [0, 1, 0, 0],
    # #                             [0, 0, 1, 0]])
    # projection_matrix = intrinsics[target_ind].cpu().detach().numpy()
    # print(f'intrinsics:{projection_matrix}')
    #
    # # 应用内参矩阵由相机坐标系转成像素坐标系
    # points_projected = np.dot(points_camera_coord[:,:3], projection_matrix.T)
    #
    # # 从齐次坐标转换为笛卡尔坐标(欧式坐标)
    # points_2D = points_projected[:, :2] / points_projected[:, 2, np.newaxis]
    #
    #
    # # 创建一个空的图像（初始化为白色背景）
    # image = np.ones(imgs[0].shape)
    # H,W,_ = image.shape
    # # 根据需要调整2D投影坐标的尺度和位置，以适应图像的尺寸
    # # 这里简化处理，直接按比例缩放并居中
    # # scale = min(W, H) / max(points_2D.max() - points_2D.min(), 1)
    # # # offset = ((W - (points_2D[:, 0].max() - points_2D[:, 0].min()) * scale) / 2,
    # # #         (H - (points_2D[:, 1].max() - points_2D[:, 1].min()) * scale) / 2)
    # # points_2D_scaled = points_2D * scale + offset
    #
    # # 计算2D投影坐标的尺度和位置，以适应图像的尺寸
    # #x_min, y_min = points_2D.min(axis=0)
    # #x_max, y_max = points_2D.max(axis=0)
    # # 保持横纵比不变的缩放
    # #scale = min(W / (x_max - x_min), H / (y_max - y_min))
    #
    # # 缩放后的坐标
    # #points_2D_scaled = (points_2D - [x_min, y_min]) * scale
    # points_2D_scaled = points_2D
    #
    # # 在图像上绘制点云
    # for point, color in zip(points_2D_scaled.astype(int), colors):
    #     x, y = point
    #     # 检查坐标是否在图像范围内
    #     if 0 <= x < W and 0 <= y < H:
    #         image[y, x] = color  # 设置像素颜色
    #
    # import cv2
    # image = cv2.resize(image, (target_mask.shape[1],target_mask.shape[0]))
    # binary_image = binarize_image(image, threshold=args.thr)
    # # 转换为Pillow图像对象，确保数据类型正确
    # from PIL import Image
    # pil_image = Image.fromarray(binary_image, 'L')  # 'L'代表灰度模式
    #
    # # 保存图像
    # os.makedirs(args.outdir, exist_ok=True)
    # pil_image.save(os.path.join(args.outdir,'point_cloud_projection_mask.png'))
    # plt.imsave(os.path.join(args.outdir,'point_cloud_projection.png'), image)