In [1]:
import cv2
import torch
from depth_anything_v2.dpt import DepthAnythingV2
import os
from tqdm import tqdm

def get_files(PATH):
 file_lan = []
 if type(PATH) is str:
 for filepath,dirnames,filenames in os.walk(PATH):
 for filename in filenames:
 file_lan.append(os.path.join(filepath,filename))
 elif type(PATH) is list:
 for path in PATH:
 for filepath,dirnames,filenames in os.walk(path):
 for filename in filenames:
 file_lan.append(os.path.join(filepath,filename))
 return file_lan

DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
# DEVICE = 'cpu'

model_configs = {
 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
}

encoder = 'vits' # or 'vits', 'vitb', 'vitl'

model = DepthAnythingV2(**model_configs[encoder])
model.load_state_dict(torch.load(f'/home/Depth-Anything-V2-main/checkpoints/depth_anything_v2_{encoder}.pth', map_location='cpu'))
model = model.to(DEVICE).eval()
img_list = [
 # r'/home/DATA/HRSOD_test/images',
 r'/home/DATA/UHRSD_TE_2K/images',
 # r'DATA/DIS-DATA/DIS-VD/images',
 # r'DATA/DIS-DATA/DIS-TE1/images',
 # r'DATA/DIS-DATA/DIS-TE2/images',
 # r'DATA/DIS-DATA/DIS-TE3/images',
 # r'DATA/DIS-DATA/DIS-TE4/images',
]
starter,ender = torch.cuda.Event(enable_timing=True),torch.cuda.Event(enable_timing=True)

with torch.no_grad():
 for i in img_list:
 file_lans = get_files(i)
 depth_path = i.replace('images','depth_small')
 os.makedirs(depth_path, exist_ok=True)
 all_time = torch.zeros(len(file_lans)) 
 nums = 0
 for files in tqdm(file_lans):
 img = cv2.imread(files)
 
 starter.record()
 depth = model.infer_image(img)
 ender.record()
 torch.cuda.synchronize()
 curr_time = starter.elapsed_time(ender)
 all_time[nums] = curr_time

 depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
 cv2.imwrite(os.path.join(depth_path, files.split('/')[-1]), depth)
 nums += 1
 # torch.cuda.empty_cache()
 mean_time = all_time.mean()
 print(f"inference time:{mean_time}ms/iter, FPS:{1000/mean_time}")

100%|██████████| 988/988 [01:11<00:00, 13.80it/s]

inference time:47.75608444213867ms/iter, FPS:20.939739227294922



