Spaces:

vemodalen
/

DepthanythingForSD

Sleeping

App Files Files Community

DepthanythingForSD / app.py

vemodalen

Upload 106 files

4eef87d verified 12 months ago

raw

history blame contribute delete

3.24 kB

	import gradio as gr
	import cv2
	import numpy as np
	import os
	from PIL import Image
	import torch
	import torch.nn.functional as F
	from torchvision.transforms import Compose
	import tempfile
	from gradio_imageslider import ImageSlider

	from depth_anything.dpt import DepthAnything
	from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet

	css = """
	#img-display-container {
	max-height: 100vh;
	}
	#img-display-input {
	max-height: 80vh;
	}
	#img-display-output {
	max-height: 80vh;
	}
	"""
	DEVICE = 'cpu'
	encoder = 'vitl' # can also be 'vitb' or 'vitl'
	model = DepthAnything.from_pretrained(f"LiheYoung/depth_anything_{encoder}14").to(DEVICE).eval()

	title = "# Depth Anything with log"

	transform = Compose([
	Resize(
	width=518,
	height=518,
	resize_target=False,
	keep_aspect_ratio=True,
	ensure_multiple_of=14,
	resize_method='lower_bound',
	image_interpolation_method=cv2.INTER_CUBIC,
	),
	NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
	PrepareForNet(),
	])

	@torch.no_grad()
	def predict_depth(model, image):
	return model(image)


	with (gr.Blocks(css=css) as demo):
	gr.Markdown(title)

	with gr.Row():
	input_image = gr.Image(label="Input Image", type='numpy', elem_id='img-display-input')
	depth_image_slider = ImageSlider(label="Depth Map with Slider View", elem_id='img-display-output', position=0.5,)
	raw_file = gr.File(label="16-bit raw depth (can be considered as disparity)")
	submit = gr.Button("Submit")

	def on_submit(image):
	original_image = image.copy()

	h, w = image.shape[:2]

	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
	image = transform({'image': image})['image']
	image = torch.from_numpy(image).unsqueeze(0).to(DEVICE)

	depth = predict_depth(model, image)
	depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]

	raw_depth = Image.fromarray(depth.cpu().numpy().astype('uint16'))
	tmp = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
	raw_depth.save(tmp.name)

	# depth = (depth - depth.min()) / (depth.max() - depth.min()) *255.
	image_flattened = depth.view(image.size(0), -1)

	# 计算分位数阈值
	lower_quantile = torch.quantile(image_flattened, 0.05, dim=1, keepdim=True)
	upper_quantile = torch.quantile(image_flattened, 0.95, dim=1, keepdim=True)

	# 应用阈值，去除极值
	clamped_image_flattened = torch.clamp(image_flattened, lower_quantile, upper_quantile)

	# 恢复图像到原始形状
	clamped_image = clamped_image_flattened.view_as(depth)

	epsilon = 1e-7 # 一个小的正值，以避免计算log(0)
	log_image = torch.log(clamped_image + epsilon)
	depth = (log_image - log_image.min()) / (log_image.max() - log_image.min()) *255.

	depth = depth.cpu().numpy().astype(np.uint8)

	return [(original_image, depth), tmp.name]

	submit.click(on_submit, inputs=[input_image], outputs=[depth_image_slider, raw_file])

	if __name__ == '__main__':
	demo.queue().launch()