Spaces:

yansong1616
/

Our3D

Paused

App Files Files Community

Our3D / segment_eval_mask.py

yansong1616

Upload 384 files

b177539 verified about 1 year ago

raw

history blame contribute delete

29.8 kB

	import re
	import os
	import copy
	from threading import enumerate
	import numpy as np
	import torch
	from dust3r.inference import inference, load_model
	from dust3r.utils.image import load_images, rgb
	from dust3r.utils.device import to_numpy
	from dust3r.image_pairs import make_pairs
	from dust3r.cloud_opt import global_aligner, GlobalAlignerMode
	from dust3r.viz import add_scene_cam, CAM_COLORS, OPENGL, pts3d_to_trimesh, cat_meshes
	from scipy.spatial.transform import Rotation
	import matplotlib.pyplot as plt
	import pandas as pd

	def get_reconstructed_scene(model, device, image_size, filelist, schedule, niter, scenegraph_type, winsize, refid):
	"""
	from a list of images, run dust3r inference, global aligner.
	"""
	imgs = load_images(filelist, size=image_size)
	if len(imgs) == 1:
	imgs = [imgs[0], copy.deepcopy(imgs[0])]
	imgs[1]['idx'] = 1
	if scenegraph_type == "swin":
	scenegraph_type = scenegraph_type + "-" + str(winsize)
	elif scenegraph_type == "oneref":
	scenegraph_type = scenegraph_type + "-" + str(refid)
	# 图片两两组合
	pairs = make_pairs(imgs, scene_graph=scenegraph_type, prefilter=None, symmetrize=True)
	output = inference(pairs, model, device, batch_size=batch_size) # 将输入的图片两两成对输入dust3r模型
	#关于output：view1、view2分别表示输入模型的两张图片，pred1、pred2分别表示两个分支的输出结果，pred包含pointmap和confidence两个结果
	mode = GlobalAlignerMode.PointCloudOptimizer if len(imgs) > 2 else GlobalAlignerMode.PairViewer
	lr = 0.01

	if mode == GlobalAlignerMode.PointCloudOptimizer:
	try: # scene：PointCloudOptimizer
	scene = global_aligner(output, device=device, mode=mode)
	# ==========Golbal optimization章节，根据公式(5)梯度下降估算世界坐标系下的三维点和外参矩阵=============
	loss = scene.compute_global_alignment(init='mst', niter=niter, schedule=schedule, lr=lr)
	except Exception as e:# 论文中的Golbal optimization章节，根据公式(5)梯度下降估算世界坐标系下的三维点和外参矩阵
	print(e)
	scene = global_aligner(output, device='cpu', mode=mode)
	print('retrying with cpu')
	loss = scene.compute_global_alignment(init='mst', niter=niter, schedule=schedule, lr=lr)

	# also return rgb, depth and confidence imgs
	# depth is normalized with the max value for all images
	# we apply the jet colormap on the confidence maps
	rgbimg = scene.imgs
	depths = to_numpy(scene.get_depthmaps()) # 深度信息D，即公式(1)上的D
	confs = to_numpy([c for c in scene.im_conf])
	cmap = plt.get_cmap('jet')
	depths_max = max([d.max() for d in depths]) # 获取最大深度值
	depths = [d/depths_max for d in depths] # 归一化
	confs_max = max([d.max() for d in confs]) # 获取置信度最大值
	confs = [cmap(d/confs_max) for d in confs] # 归一化

	imgs = []
	for i in range(len(rgbimg)):
	imgs.append(rgbimg[i])
	imgs.append(rgb(depths[i]))
	imgs.append(rgb(confs[i]))

	return scene, rgbimg


	def show_mask(mask, ax, random_color=False):
	if random_color:
	color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
	else:
	color = np.array([30/255, 144/255, 255/255, 0.6])
	h, w = mask.shape[-2:]
	mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
	ax.imshow(mask_image)

	def show_points(coords, labels, ax, marker_size=375):
	pos_points = coords[labels==1]
	neg_points = coords[labels==0]
	ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
	ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)

	def seg(predictor, rgbimgs, masks, target_ind):
	print('SAM step...')
	# return masks
	fig, ax = plt.subplots(len(rgbimgs), 4, figsize=(20, 20))
	for i, img in zip(range(len(rgbimgs)),rgbimgs):
	predictor.set_image((img * 255).astype(np.uint8))
	h,w,c = img.shape
	input_point = np.array([
	[int(w/2), int(h/2)],
	[int(w/2), int(h/2)-30],
	])
	input_label = np.array([1,1])
	m, scores, logits = predictor.predict( # 输入SAM，提示信息是中心的两个point提示
	point_coords=input_point,
	point_labels=input_label,
	multimask_output=False,
	)
	show_mask(m[0], ax[i][0], random_color=True) # 第一列即m[0]展示的是SAM以上面两个point为提示信息输出的分割结果
	show_points(input_point, input_label, ax[i][0])
	ax[i][1].imshow(img) # 第二列展示的是原图
	ax[i][2].imshow(masks[i].detach().cpu().numpy()) # 第三列展示的是置信度大于阈值的像素点
	masks[i] = masks[i].detach().cpu().numpy() & m[0]
	ax[i][3].imshow(masks[i]) # 第四列是一、三列分割图的交集
	masks[i] = m[0]
	plt.savefig("masks.png")
	return masks # 返回的是针对每张图片，SAM以上面两个point为提示信息输出的分割结果

	def rgb_to_grayscale(img):
	"""将RGB图像转换为灰度图。"""
	return np.dot(img[..., :3], [0.2989, 0.5870, 0.1140])

	def binarize_image(img, threshold=0):
	grayscale = rgb_to_grayscale(img)
	return (~(grayscale > threshold)).astype(np.uint8) * 255

	# by guoyansong
	'''
	接受的输入是：data/数据集名称/scene，如data/nerf_llff_data(NVOS-all)/horns
	'''
	def run(img_dir):
	dataset_name = img_dir.split('/')[-2]
	scene_name = img_dir.split('/')[-1]
	outdir = os.path.join("output", dataset_name, scene_name)


	model_path = args.model_path
	device = 'cuda'
	print("=============================================")
	print(torch.cuda.is_available())

	# 1、===============================加载数据集==============================
	from load_nvos import load_nvos_data_evaluate
	target_ind, target_mask, all_imgfiles = load_nvos_data_evaluate(
	basedir=img_dir)
	# print(ref_ind, ref_pose.shape, target_ind, all_imgfiles, all_poses.shape)
	# print(target_pose)

	from SAM import SamPredictor
	from SAM.build_sam import sam_model_registry
	sam = sam_model_registry[args.sam_model](checkpoint=args.sam_checkpoint)
	sam.to(device=device)
	predictor = SamPredictor(sam)

	model = load_model(model_path, device) # dust3R
	# load_images can take a list of images or a directory

	# 2、==============调用DUST3R和Global Alignment获取pointmaps====================
	scene, imgs = get_reconstructed_scene( # 调用DUST3R
	model=model, device=device,
	image_size=512, filelist=all_imgfiles, schedule=schedule,
	niter=niter, scenegraph_type="complete", winsize=1, refid=0,
	)

	poses = scene.get_im_poses() # cam to world 外参数矩阵的逆
	intrinsics = scene.get_intrinsics()
	pts3d = scene.get_pts3d()
	confidence_masks = scene.get_masks()

	# 3、===============================调用SAM获取2D masks==========================
	# 这里返回的是针对每张图片，SAM以目标中心的两个point为提示信息输出的分割结果
	masks = seg(predictor, imgs, confidence_masks, target_ind)

	# 4、==============================基于2D masks获取3D masks=====================
	pts3d_list = []
	color_list = []
	for i, mask in zip(range(len(masks)), masks):
	# 将SAM分割的结果和三维点云融合，即去除背景只剩下目标物体的三维点
	pts3d_list.append(pts3d[i][mask].detach().cpu().numpy())
	# 将SAM分割的结果和原图融合，即去除背景，这里是为了取出原图中二维点像素值，给上面的三维点染色（pts3d_list和color_list的点是一一对应的）
	color_list.append(imgs[i][mask])
	# 将所有的三维点连接在一起，即全部绘制出来表示目标物体的三维点云（即论文中的公式(5)经过梯度下降计算出的世界坐标系下的点）
	points_3D = np.concatenate(pts3d_list).reshape(-1, 3)
	colors = np.concatenate(color_list).reshape(-1, 3)

	# 将点云数据转换为齐次坐标形式 (N, 4)，最后一列为1
	points_3D = np.hstack((points_3D, np.ones((len(points_3D), 1))))
	# 实际中，P 应根据你的相机参数来设置
	# P = np.eye(4) # 假设为单位矩阵，即无旋转无平移
	# P = poses[target_ind].cpu().detach().numpy()
	# P是从相机到世界的变换矩阵，我们需要它的逆来从世界变换到相机坐标系
	P = np.linalg.inv(poses[target_ind].cpu().detach().numpy())
	print(f'Word 2 Camera:{P}')
	# P = np.eye(4)
	# P[:3,:4] = target_pose[:,:4]

	# 变换点云到相机坐标系
	points_camera_coord = np.dot(points_3D, P.T)
	# points_camera_coord = points_3D

	# 假设的投影矩阵，这里只是一个简化的示例
	# 真实情况下，投影矩阵取决于相机的内参等因素
	# projection_matrix = np.array([[1, 0, 0, 0],
	# [0, 1, 0, 0],
	# [0, 0, 1, 0]])
	projection_matrix = intrinsics[target_ind].cpu().detach().numpy()
	print(f'intrinsics:{projection_matrix}')

	# 应用内参矩阵由相机坐标系转成像素坐标系
	points_projected = np.dot(points_camera_coord[:, :3], projection_matrix.T)

	# 从齐次坐标转换为笛卡尔坐标(欧式坐标)
	points_2D = points_projected[:, :2] / points_projected[:, 2, np.newaxis]

	# 创建一个空的图像（初始化为白色背景）
	image = np.ones(imgs[0].shape)
	H, W, _ = image.shape
	# 根据需要调整2D投影坐标的尺度和位置，以适应图像的尺寸
	# 这里简化处理，直接按比例缩放并居中
	# scale = min(W, H) / max(points_2D.max() - points_2D.min(), 1)
	# # offset = ((W - (points_2D[:, 0].max() - points_2D[:, 0].min()) * scale) / 2,
	# # (H - (points_2D[:, 1].max() - points_2D[:, 1].min()) * scale) / 2)
	# points_2D_scaled = points_2D * scale + offset

	# 计算2D投影坐标的尺度和位置，以适应图像的尺寸
	# x_min, y_min = points_2D.min(axis=0)
	# x_max, y_max = points_2D.max(axis=0)
	# 保持横纵比不变的缩放
	# scale = min(W / (x_max - x_min), H / (y_max - y_min))

	# 缩放后的坐标
	# points_2D_scaled = (points_2D - [x_min, y_min]) * scale
	points_2D_scaled = points_2D

	# 在图像上绘制点云
	for point, color in zip(points_2D_scaled.astype(int), colors):
	x, y = point
	# 检查坐标是否在图像范围内
	if 0 <= x < W and 0 <= y < H:
	image[y, x] = color # 设置像素颜色

	import cv2
	image = cv2.resize(image, (target_mask.shape[1], target_mask.shape[0]))
	binary_image = binarize_image(image, threshold=args.thr)
	# 转换为Pillow图像对象，确保数据类型正确
	from PIL import Image
	pil_image = Image.fromarray(binary_image, 'L') # 'L'代表灰度模式

	# 保存图像
	os.makedirs(outdir, exist_ok=True)
	pil_image.save(os.path.join(outdir, 'point_cloud_projection_mask.png'))
	plt.imsave(os.path.join(outdir, 'point_cloud_projection.png'), image)


	# 5、======================================计算IoU和Acc==================================
	import evaluate
	"""
	img_dir: data/nerf_llff_data(NVOS-all)/horns
	out_dir: output/horns/nerf_llff_data(NVOS-all)/horns
	mask_path:output/nerf_llff_data(NVOS-all)/horns/point_cloud_projection_mask.png
	gt_path: data/nerf_llff_data(NVOS-all)/masks/horns_center/DJI_20200223_163024_597_mask.png
	"""
	mask_path = os.path.join(outdir, "point_cloud_projection_mask.png")
	gt_dir = os.path.join("data", dataset_name, "masks", scene_name)
	gt_path = ""
	for name in os.listdir(gt_dir):
	if "_mask" in name:
	gt_path = os.path.join(gt_dir, name)

	assert gt_path != "", os.path.join(outdir, "point_cloud_projection_mask.png") + "路径下没有mask图片"
	iou, acc = evaluate.get_eval(mask_path, gt_path)
	append_to_excel(os.path.join("output", dataset_name, 'iouAndAcc.xlsx'), scene_name, iou, acc)


	# gys：读取mask文件夹下的所有ground truth masks，不再需要经过SAM生成mask
	def get_gt_masks(folder_path):
	from dust3r.utils.image import load_images, rgb
	imgs_mask = load_images(folder_path, 512)
	# 定义保存布尔mask的列表
	bool_masks = []

	for mask in imgs_mask:
	image_array = mask['img'].squeeze(0).numpy()
	# 将RGB图像转换为灰度图像
	# 使用简单的加权方法转换为灰度: Y = 0.299R + 0.587G + 0.114*B
	gray_image = 0.299 * image_array[0] + 0.587 * image_array[1] + 0.114 * image_array[2]

	# 将灰度图像转换为布尔数组（前景为True，背景为False）
	bool_array = gray_image > 0

	# 将布尔数组添加到列表中
	bool_masks.append(bool_array)

	# 输出布尔mask的数量
	print(f"Total number of mask images processed: {len(bool_masks)}")
	return bool_masks

	# by guoyansong
	'''
	接受的输入路径是：data/llff(sanerf-hq)/cecread
	不再经过人工提示和SAM生成mask，直接基于每个图片的gt.mask获取3D mask
	'''
	def run_llff_SANeRF_HQ(img_dir):
	dataset_name = img_dir.split('/')[-2]
	scene_name = img_dir.split('/')[-1]
	outdir = os.path.join("output", dataset_name, scene_name)


	model_path = args.model_path
	device = 'cuda'
	print("=============================================")
	print(torch.cuda.is_available())

	# 1、===============================加载数据集==============================
	from load_nvos import load_nvos_data_evaluate
	target_ind, target_mask, all_imgfiles = load_nvos_data_evaluate(
	basedir=img_dir)
	# print(ref_ind, ref_pose.shape, target_ind, all_imgfiles, all_poses.shape)
	# print(target_pose)

	# from SAM import SamPredictor
	# from SAM.build_sam import sam_model_registry
	# sam = sam_model_registry[args.sam_model](checkpoint=args.sam_checkpoint)
	# sam.to(device=device)
	# predictor = SamPredictor(sam)

	model = load_model(model_path, device) # dust3R
	# load_images can take a list of images or a directory

	# 2、==============调用DUST3R和Global Alignment获取pointmaps====================
	scene, imgs = get_reconstructed_scene( # 调用DUST3R
	model=model, device=device,
	image_size=512, filelist=all_imgfiles, schedule=schedule,
	niter=niter, scenegraph_type="complete", winsize=1, refid=0,
	)

	poses = scene.get_im_poses() # cam to world 外参数矩阵的逆
	intrinsics = scene.get_intrinsics()
	pts3d = scene.get_pts3d()
	confidence_masks = scene.get_masks()

	# 3、===============================调用SAM获取2D masks==========================
	# 这里返回的是针对每张图片，SAM以目标中心的两个point为提示信息输出的分割结果
	# TODO 直接读取mask文件夹下的gt. masks，不再需要SAM输出mask
	# masks = seg(predictor, imgs, confidence_masks, target_ind)
	gt_masks_dir = os.path.join("data", dataset_name, scene_name, "gt_masks")
	masks = get_gt_masks(gt_masks_dir)

	# 4、==============================基于2D masks获取3D masks=====================
	pts3d_list = []
	color_list = []
	for i, mask in zip(range(len(masks)), masks):
	# 将SAM分割的结果和三维点云融合，即去除背景只剩下目标物体的三维点
	# pts3d_list.append(pts3d[i][mask].detach().cpu().numpy())
	pts3d_list.append(pts3d[i].detach().cpu().numpy())
	# # 将SAM分割的结果和原图融合，即去除背景，这里是为了取出原图中二维点像素值，给上面的三维点染色（pts3d_list和color_list的点是一一对应的）
	# color_list.append(imgs[i][mask])
	color_list.append(imgs[i])
	# 将所有的三维点连接在一起，即全部绘制出来表示目标物体的三维点云（即论文中的公式(5)经过梯度下降计算出的世界坐标系下的点）
	points_3D = np.concatenate(pts3d_list).reshape(-1, 3)
	colors = np.concatenate(color_list).reshape(-1, 3)

	# 将点云数据转换为齐次坐标形式 (N, 4)，最后一列为1
	points_3D = np.hstack((points_3D, np.ones((len(points_3D), 1))))
	# 实际中，P 应根据你的相机参数来设置
	# P = np.eye(4) # 假设为单位矩阵，即无旋转无平移
	# P = poses[target_ind].cpu().detach().numpy()
	# P是从相机到世界的变换矩阵，我们需要它的逆来从世界变换到相机坐标系
	P = np.linalg.inv(poses[target_ind].cpu().detach().numpy())
	print(f'Word 2 Camera:{P}')
	# P = np.eye(4)
	# P[:3,:4] = target_pose[:,:4]

	# 变换点云到相机坐标系
	points_camera_coord = np.dot(points_3D, P.T)
	# points_camera_coord = points_3D

	# 假设的投影矩阵，这里只是一个简化的示例
	# 真实情况下，投影矩阵取决于相机的内参等因素
	# projection_matrix = np.array([[1, 0, 0, 0],
	# [0, 1, 0, 0],
	# [0, 0, 1, 0]])
	projection_matrix = intrinsics[target_ind].cpu().detach().numpy()
	print(f'intrinsics:{projection_matrix}')

	# 应用内参矩阵由相机坐标系转成像素坐标系
	points_projected = np.dot(points_camera_coord[:, :3], projection_matrix.T)

	# 从齐次坐标转换为笛卡尔坐标(欧式坐标)
	points_2D = points_projected[:, :2] / points_projected[:, 2, np.newaxis]

	# 创建一个空的图像（初始化为白色背景）
	image = np.ones(imgs[0].shape)
	H, W, _ = image.shape
	# 根据需要调整2D投影坐标的尺度和位置，以适应图像的尺寸
	# 这里简化处理，直接按比例缩放并居中
	# scale = min(W, H) / max(points_2D.max() - points_2D.min(), 1)
	# # offset = ((W - (points_2D[:, 0].max() - points_2D[:, 0].min()) * scale) / 2,
	# # (H - (points_2D[:, 1].max() - points_2D[:, 1].min()) * scale) / 2)
	# points_2D_scaled = points_2D * scale + offset

	# 计算2D投影坐标的尺度和位置，以适应图像的尺寸
	# x_min, y_min = points_2D.min(axis=0)
	# x_max, y_max = points_2D.max(axis=0)
	# 保持横纵比不变的缩放
	# scale = min(W / (x_max - x_min), H / (y_max - y_min))

	# 缩放后的坐标
	# points_2D_scaled = (points_2D - [x_min, y_min]) * scale
	points_2D_scaled = points_2D

	# 在图像上绘制点云
	for point, color in zip(points_2D_scaled.astype(int), colors):
	x, y = point
	# 检查坐标是否在图像范围内
	if 0 <= x < W and 0 <= y < H:
	image[y, x] = color # 设置像素颜色

	import cv2
	image = cv2.resize(image, (target_mask.shape[1], target_mask.shape[0]))
	binary_image = binarize_image(image, threshold=args.thr)
	# 转换为Pillow图像对象，确保数据类型正确
	from PIL import Image
	pil_image = Image.fromarray(binary_image, 'L') # 'L'代表灰度模式

	# 保存图像
	os.makedirs(outdir, exist_ok=True)
	pil_image.save(os.path.join(outdir, 'point_cloud_projection_mask.png'))
	plt.imsave(os.path.join(outdir, 'point_cloud_projection.png'), image)


	# 5、======================================计算IoU和Acc==================================
	import evaluate
	"""
	img_dir: data/nerf_llff_data(NVOS-all)/horns
	out_dir: output/horns/nerf_llff_data(NVOS-all)/horns
	mask_path:output/nerf_llff_data(NVOS-all)/horns/point_cloud_projection_mask.png
	gt_path: data/nerf_llff_data(NVOS-all)/masks/horns_center/DJI_20200223_163024_597_mask.png
	"""
	mask_path = os.path.join(outdir, "point_cloud_projection_mask.png")
	gt_dir = os.path.join("data", dataset_name, "masks", scene_name)
	gt_path = ""
	for name in os.listdir(gt_dir):
	if "_mask" in name:
	gt_path = os.path.join(gt_dir, name)

	assert gt_path != "", os.path.join(outdir, "point_cloud_projection_mask.png") + "路径下没有mask图片"
	iou, acc = evaluate.get_eval(mask_path, gt_path)
	append_to_excel(os.path.join("output", dataset_name, 'iouAndAcc.xlsx'), scene_name, iou, acc)


	# 将所有scene的iou和acc结果写入Excel，便于求mIoU和mAcc
	def append_to_excel(file_path, scene, mIoU, mAcc):
	# 创建一个 DataFrame 保存新的数据
	data = {
	'scene': [scene],
	'mIoU': [f"{mIoU:.12f}"],
	'mAcc': [f"{mAcc:.12f}"]
	}
	new_df = pd.DataFrame(data)

	# 如果文件不存在，创建新的文件并写入数据
	if not os.path.exists(file_path):
	with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
	new_df.to_excel(writer, index=False)
	else:
	# 如果文件存在，读取文件并追加新数据
	existing_df = pd.read_excel(file_path)
	updated_df = pd.concat([existing_df, new_df], ignore_index=True)

	with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
	updated_df.to_excel(writer, index=False)


	# 将所有scene的iou和acc结果写入Excel，便于求mIoU和mAcc
	def append_to_excel(file_path, scene, mIoU, mAcc):
	# 创建一个 DataFrame 保存新的数据
	data = {
	'scene': [scene],
	'mIoU': [f"{mIoU:.12f}"],
	'mAcc': [f"{mAcc:.12f}"]
	}
	new_df = pd.DataFrame(data)

	# 如果文件不存在，创建新的文件并写入数据
	if not os.path.exists(file_path):
	with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
	new_df.to_excel(writer, index=False)
	else:
	# 如果文件存在，读取文件并追加新数据
	existing_df = pd.read_excel(file_path)
	updated_df = pd.concat([existing_df, new_df], ignore_index=True)

	with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
	updated_df.to_excel(writer, index=False)

	batch_size = 1
	schedule = 'cosine'
	lr = 0.01
	niter = 50 #global alignment中的迭代次数；初始值是300，本机内存爆炸，这里改小到50

	if __name__ == '__main__':
	import argparse
	parser = argparse.ArgumentParser()
	parser.add_argument('--img-dir', type=str, default="data/nerf_llff_data(NVOS-all)")
	parser.add_argument('--model-path', type=str, default="checkpoints/DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth")
	parser.add_argument('--sam-model', type=str, default="vit_b")
	parser.add_argument('--sam-checkpoint', type=str, default="checkpoints/sam_vit_b_01ec64.pth")
	parser.add_argument('--outdir', type=str, default="output/eval/horns")
	parser.add_argument('--thr', type=float, default=0.6) # 0.6
	args = parser.parse_args()


	#### 测试llff(sanerf-hq)数据集的cecread场景
	# run_llff_SANeRF_HQ("data/nerf_llff_data(NVOS-all)/trex")


	for f in os.listdir(args.img_dir):
	scene_path = os.path.join(args.img_dir, f).replace('\\', '/')
	if(os.path.isdir(scene_path) and f != "horns" and f != "masks" and f!="reference_image" and f != "scribbles"):
	run_llff_SANeRF_HQ(scene_path)





	# import evaluate
	#
	# """
	# img_dir: data/nerf_llff_data(NVOS-all)/horns
	# out_dir: output/horns/nerf_llff_data(NVOS-all)/horns
	# mask_path:output/nerf_llff_data(NVOS-all)/horns/point_cloud_projection_mask.png
	# gt_path: data/nerf_llff_data(NVOS-all)/masks/horns_center/DJI_20200223_163024_597_mask.png
	# """
	# mask_path = os.path.join(out_dir, "point_cloud_projection_mask.png")
	# gt_dir = os.path.join("data", dataset_name, "masks", scene_name)
	# gt_path = ""
	# for name in os.listdir(gt_dir):
	# if "_mask" in name:
	# gt_path = os.path.join(gt_dir, name)
	#
	# assert gt_path != "", os.path.join(out_dir, "point_cloud_projection_mask.png") + "路径下没有mask图片"
	# evaluate.get_eval(mask_path, gt_path)


	# model_path = args.model_path
	# outdir = args.outdir
	# device = 'cuda'
	# print("=============================================")
	# print(torch.cuda.is_available())

	# from load_nvos import load_nvos_data
	# ref_ind, ref_pose, target_ind, target_pose, target_mask, all_imgfiles, all_poses = load_nvos_data(basedir=args.img_dir)
	# # print(ref_ind, ref_pose.shape, target_ind, all_imgfiles, all_poses.shape)
	# # print(target_pose)
	#
	# from SAM import SamPredictor
	# from SAM.build_sam import sam_model_registry
	# sam = sam_model_registry[args.sam_model](checkpoint=args.sam_checkpoint)
	# sam.to(device=device)
	# predictor = SamPredictor(sam)
	#
	# batch_size = 1
	# schedule = 'cosine'
	# lr = 0.01
	# niter = 50 #300，本机内存爆炸，这里改小到50
	#
	# model = load_model(model_path, device) # dust3R
	# # load_images can take a list of images or a directory
	# scene, imgs = get_reconstructed_scene( # 调用DUST3R
	# model=model, device=device,
	# image_size=512, filelist=all_imgfiles, schedule=schedule,
	# niter=niter, scenegraph_type="complete", winsize=1, refid=0,
	# )
	#
	# poses = scene.get_im_poses() # cam to world 外参数矩阵的逆
	# intrinsics = scene.get_intrinsics()
	# pts3d = scene.get_pts3d()
	# confidence_masks = scene.get_masks()
	#
	# # 这里返回的是针对每张图片，SAM以目标中心的两个point为提示信息输出的分割结果
	# masks = seg(predictor, imgs, confidence_masks, target_ind)
	# pts3d_list = []
	# color_list = []
	# for i, mask in zip(range(len(masks)),masks):
	# # 将SAM分割的结果和三维点云融合，即去除背景只剩下目标物体的三维点
	# pts3d_list.append(pts3d[i][mask].detach().cpu().numpy())
	# # 将SAM分割的结果和原图融合，即去除背景，这里是为了取出原图中二维点像素值，给上面的三维点染色（pts3d_list和color_list的点是一一对应的）
	# color_list.append(imgs[i][mask])
	# # 将所有的三维点连接在一起，即全部绘制出来表示目标物体的三维点云（即论文中的公式(5)经过梯度下降计算出的世界坐标系下的点）
	# points_3D = np.concatenate(pts3d_list).reshape(-1,3)
	# colors = np.concatenate(color_list).reshape(-1,3)
	#
	# # 将点云数据转换为齐次坐标形式 (N, 4)，最后一列为1
	# points_3D = np.hstack((points_3D, np.ones((len(points_3D), 1))))
	# # 实际中，P 应根据你的相机参数来设置
	# # P = np.eye(4) # 假设为单位矩阵，即无旋转无平移
	# # P = poses[target_ind].cpu().detach().numpy()
	# # P是从相机到世界的变换矩阵，我们需要它的逆来从世界变换到相机坐标系
	# P = np.linalg.inv(poses[target_ind].cpu().detach().numpy())
	# print(f'Word 2 Camera:{P}')
	# # P = np.eye(4)
	# # P[:3,:4] = target_pose[:,:4]
	#
	# # 变换点云到相机坐标系
	# points_camera_coord = np.dot(points_3D, P.T)
	# # points_camera_coord = points_3D
	#
	# # 假设的投影矩阵，这里只是一个简化的示例
	# # 真实情况下，投影矩阵取决于相机的内参等因素
	# # projection_matrix = np.array([[1, 0, 0, 0],
	# # [0, 1, 0, 0],
	# # [0, 0, 1, 0]])
	# projection_matrix = intrinsics[target_ind].cpu().detach().numpy()
	# print(f'intrinsics:{projection_matrix}')
	#
	# # 应用内参矩阵由相机坐标系转成像素坐标系
	# points_projected = np.dot(points_camera_coord[:,:3], projection_matrix.T)
	#
	# # 从齐次坐标转换为笛卡尔坐标(欧式坐标)
	# points_2D = points_projected[:, :2] / points_projected[:, 2, np.newaxis]
	#
	#
	# # 创建一个空的图像（初始化为白色背景）
	# image = np.ones(imgs[0].shape)
	# H,W,_ = image.shape
	# # 根据需要调整2D投影坐标的尺度和位置，以适应图像的尺寸
	# # 这里简化处理，直接按比例缩放并居中
	# # scale = min(W, H) / max(points_2D.max() - points_2D.min(), 1)
	# # # offset = ((W - (points_2D[:, 0].max() - points_2D[:, 0].min()) * scale) / 2,
	# # # (H - (points_2D[:, 1].max() - points_2D[:, 1].min()) * scale) / 2)
	# # points_2D_scaled = points_2D * scale + offset
	#
	# # 计算2D投影坐标的尺度和位置，以适应图像的尺寸
	# #x_min, y_min = points_2D.min(axis=0)
	# #x_max, y_max = points_2D.max(axis=0)
	# # 保持横纵比不变的缩放
	# #scale = min(W / (x_max - x_min), H / (y_max - y_min))
	#
	# # 缩放后的坐标
	# #points_2D_scaled = (points_2D - [x_min, y_min]) * scale
	# points_2D_scaled = points_2D
	#
	# # 在图像上绘制点云
	# for point, color in zip(points_2D_scaled.astype(int), colors):
	# x, y = point
	# # 检查坐标是否在图像范围内
	# if 0 <= x < W and 0 <= y < H:
	# image[y, x] = color # 设置像素颜色
	#
	# import cv2
	# image = cv2.resize(image, (target_mask.shape[1],target_mask.shape[0]))
	# binary_image = binarize_image(image, threshold=args.thr)
	# # 转换为Pillow图像对象，确保数据类型正确
	# from PIL import Image
	# pil_image = Image.fromarray(binary_image, 'L') # 'L'代表灰度模式
	#
	# # 保存图像
	# os.makedirs(args.outdir, exist_ok=True)
	# pil_image.save(os.path.join(args.outdir,'point_cloud_projection_mask.png'))
	# plt.imsave(os.path.join(args.outdir,'point_cloud_projection.png'), image)