cannyest / app.txt
srivatsavdamaraju's picture
Update app.txt
c7c40ce verified
raw
history blame
2.3 kB
import cv2
import torch
import numpy as np
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForDepthEstimation
import streamlit as st
# Load model and image processor
image_processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
# Set the device for model (CUDA if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Use FP16 if available (half precision for speed)
if torch.cuda.is_available():
model = model.half()
# Streamlit App
st.title("Real-time Depth Estimation from Webcam")
# Initialize the webcam capture (OpenCV)
cap = cv2.VideoCapture(0)
# Streamlit button to capture a screenshot
if st.button("Capture Screenshot"):
ret, frame = cap.read()
if ret:
# Process the frame for depth estimation
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
image = Image.fromarray(frame_rgb)
# Prepare image for the model
inputs = image_processor(images=image, return_tensors="pt").to(device)
# Model inference (no gradients needed)
with torch.no_grad():
outputs = model(**inputs)
predicted_depth = outputs.predicted_depth
# Interpolate depth map to match the frame's dimensions
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1),
size=(frame.shape[0], frame.shape[1]), # Match the frame's dimensions
mode="bicubic",
align_corners=False,
)
# Convert depth map to numpy for visualization
depth_map = prediction.squeeze().cpu().numpy()
# Normalize depth map for display (visualization purposes)
depth_map_normalized = np.uint8(depth_map / np.max(depth_map) * 255)
depth_map_colored = cv2.applyColorMap(depth_map_normalized, cv2.COLORMAP_JET)
# Display the original frame and the depth map in Streamlit
st.image(frame, caption="Original Webcam Image", channels="BGR", use_column_width=True)
st.image(depth_map_colored, caption="Depth Map", channels="BGR", use_column_width=True)
# Release the capture object when done
cap.release()