{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 988/988 [01:11<00:00, 13.80it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "inference time:47.75608444213867ms/iter, FPS:20.939739227294922\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "import cv2\n", "import torch\n", "from depth_anything_v2.dpt import DepthAnythingV2\n", "import os\n", "from tqdm import tqdm\n", "\n", "def get_files(PATH):\n", " file_lan = []\n", " if type(PATH) is str:\n", " for filepath,dirnames,filenames in os.walk(PATH):\n", " for filename in filenames:\n", " file_lan.append(os.path.join(filepath,filename))\n", " elif type(PATH) is list:\n", " for path in PATH:\n", " for filepath,dirnames,filenames in os.walk(path):\n", " for filename in filenames:\n", " file_lan.append(os.path.join(filepath,filename))\n", " return file_lan\n", "\n", "DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'\n", "# DEVICE = 'cpu'\n", "\n", "model_configs = {\n", " 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},\n", " 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},\n", " 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},\n", " 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}\n", "}\n", "\n", "encoder = 'vits' # or 'vits', 'vitb', 'vitl'\n", "\n", "model = DepthAnythingV2(**model_configs[encoder])\n", "model.load_state_dict(torch.load(f'/home/Depth-Anything-V2-main/checkpoints/depth_anything_v2_{encoder}.pth', map_location='cpu'))\n", "model = model.to(DEVICE).eval()\n", "img_list = [\n", " # r'/home/DATA/HRSOD_test/images',\n", " r'/home/DATA/UHRSD_TE_2K/images',\n", " # r'DATA/DIS-DATA/DIS-VD/images',\n", " # r'DATA/DIS-DATA/DIS-TE1/images',\n", " # r'DATA/DIS-DATA/DIS-TE2/images',\n", " # r'DATA/DIS-DATA/DIS-TE3/images',\n", " # r'DATA/DIS-DATA/DIS-TE4/images',\n", "]\n", "starter,ender = torch.cuda.Event(enable_timing=True),torch.cuda.Event(enable_timing=True)\n", "\n", "with torch.no_grad():\n", " for i in img_list:\n", " file_lans = get_files(i)\n", " depth_path = i.replace('images','depth_small')\n", " os.makedirs(depth_path, exist_ok=True)\n", " all_time = torch.zeros(len(file_lans)) \n", " nums = 0\n", " for files in tqdm(file_lans):\n", " img = cv2.imread(files)\n", " \n", " starter.record()\n", " depth = model.infer_image(img)\n", " ender.record()\n", " torch.cuda.synchronize()\n", " curr_time = starter.elapsed_time(ender)\n", " all_time[nums] = curr_time\n", "\n", " depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0\n", " cv2.imwrite(os.path.join(depth_path, files.split('/')[-1]), depth)\n", " nums += 1\n", " # torch.cuda.empty_cache()\n", " mean_time = all_time.mean()\n", " print(f\"inference time:{mean_time}ms/iter, FPS:{1000/mean_time}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "wsltorch", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 2 }