cannyest / app.py
srivatsavdamaraju's picture
Update app.py
b010a27 verified
raw
history blame
1.97 kB
import torch
import numpy as np
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForDepthEstimation
import streamlit as st
import cv2
# Load model and image processor
image_processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
# Set the device for model (CUDA if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Use FP16 if available (half precision for speed)
if torch.cuda.is_available():
model = model.half()
# Streamlit App
st.title("Depth Estimation from Webcam")
# Capture image from webcam
image_data = st.camera_input("Capture an image")
if image_data is not None:
# Convert the captured image data to a PIL image
image = Image.open(image_data)
# Prepare the image for the model
inputs = image_processor(images=image, return_tensors="pt").to(device)
# Model inference (no gradients needed)
with torch.no_grad():
outputs = model(**inputs)
predicted_depth = outputs.predicted_depth
# Interpolate depth map to match the image's dimensions
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1),
size=(image.height, image.width), # Match the image's dimensions
mode="bicubic",
align_corners=False,
)
# Convert depth map to numpy for visualization
depth_map = prediction.squeeze().cpu().numpy()
# Normalize depth map for display (visualization purposes)
depth_map_normalized = np.uint8(depth_map / np.max(depth_map) * 255)
depth_map_colored = cv2.applyColorMap(depth_map_normalized, cv2.COLORMAP_JET)
# Display the original image and the depth map in Streamlit
st.image(image, caption="Captured Image", use_column_width=True)
st.image(depth_map_colored, caption="Depth Map", use_column_width=True)