Spaces:
Running
Running
srivatsavdamaraju
commited on
Update app.txt
Browse files
app.txt
CHANGED
@@ -1,75 +1,62 @@
|
|
1 |
-
import streamlit as st
|
2 |
import cv2
|
|
|
3 |
import numpy as np
|
4 |
from PIL import Image
|
5 |
-
import
|
6 |
-
import
|
7 |
-
|
8 |
-
# Function to process the image
|
9 |
-
def process_frame(frame):
|
10 |
-
# Convert the frame to a grayscale image (as an example)
|
11 |
-
gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
12 |
-
return gray_frame
|
13 |
-
|
14 |
-
# HTML and JavaScript to access the webcam
|
15 |
-
html_code = """
|
16 |
-
<html>
|
17 |
-
<body>
|
18 |
-
<video id="webcam" width="640" height="480" autoplay></video>
|
19 |
-
<canvas id="canvas" width="640" height="480" style="display: none;"></canvas>
|
20 |
-
<button onclick="captureFrame()">Capture Frame</button>
|
21 |
-
<script>
|
22 |
-
const video = document.getElementById("webcam");
|
23 |
-
const canvas = document.getElementById("canvas");
|
24 |
-
const ctx = canvas.getContext("2d");
|
25 |
-
|
26 |
-
// Access the user's webcam
|
27 |
-
navigator.mediaDevices.getUserMedia({ video: true })
|
28 |
-
.then(function(stream) {
|
29 |
-
video.srcObject = stream;
|
30 |
-
})
|
31 |
-
.catch(function(error) {
|
32 |
-
console.error("Error accessing the webcam", error);
|
33 |
-
});
|
34 |
-
|
35 |
-
function captureFrame() {
|
36 |
-
// Draw the current video frame onto the canvas
|
37 |
-
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
38 |
-
|
39 |
-
// Convert the canvas image to a base64-encoded PNG image
|
40 |
-
const imageData = canvas.toDataURL("image/png");
|
41 |
-
|
42 |
-
// Send the image to Streamlit for processing
|
43 |
-
window.parent.postMessage(imageData, "*");
|
44 |
-
}
|
45 |
-
</script>
|
46 |
-
</body>
|
47 |
-
</html>
|
48 |
-
"""
|
49 |
-
|
50 |
-
# Create a Streamlit component for the webcam
|
51 |
-
st.components.v1.html(html_code, height=480)
|
52 |
-
|
53 |
-
# This will hold the base64 encoded image
|
54 |
-
if 'image_data' not in st.session_state:
|
55 |
-
st.session_state['image_data'] = None
|
56 |
-
|
57 |
-
# Capture the image data from JavaScript to Streamlit
|
58 |
-
def receive_image(image_data):
|
59 |
-
if image_data is not None:
|
60 |
-
# Decode the base64 image
|
61 |
-
img_str = image_data.split(",")[1]
|
62 |
-
img_bytes = base64.b64decode(img_str)
|
63 |
-
img_array = np.frombuffer(img_bytes, np.uint8)
|
64 |
-
frame = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
|
65 |
-
|
66 |
-
# Process the frame
|
67 |
-
processed_frame = process_frame(frame)
|
68 |
-
|
69 |
-
# Convert the processed frame to PIL image to display
|
70 |
-
processed_pil_image = Image.fromarray(processed_frame)
|
71 |
-
st.image(processed_pil_image, caption="Processed Frame", use_column_width=True)
|
72 |
|
73 |
-
#
|
74 |
-
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import cv2
|
2 |
+
import torch
|
3 |
import numpy as np
|
4 |
from PIL import Image
|
5 |
+
from transformers import AutoImageProcessor, AutoModelForDepthEstimation
|
6 |
+
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
+
# Load model and image processor
|
9 |
+
image_processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
|
10 |
+
model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
|
11 |
+
|
12 |
+
# Set the device for model (CUDA if available)
|
13 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
14 |
+
model.to(device)
|
15 |
+
|
16 |
+
# Use FP16 if available (half precision for speed)
|
17 |
+
if torch.cuda.is_available():
|
18 |
+
model = model.half()
|
19 |
+
|
20 |
+
# Streamlit App
|
21 |
+
st.title("Real-time Depth Estimation from Webcam")
|
22 |
+
|
23 |
+
# Initialize the webcam capture (OpenCV)
|
24 |
+
cap = cv2.VideoCapture(0)
|
25 |
+
|
26 |
+
# Streamlit button to capture a screenshot
|
27 |
+
if st.button("Capture Screenshot"):
|
28 |
+
ret, frame = cap.read()
|
29 |
+
if ret:
|
30 |
+
# Process the frame for depth estimation
|
31 |
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
32 |
+
image = Image.fromarray(frame_rgb)
|
33 |
+
|
34 |
+
# Prepare image for the model
|
35 |
+
inputs = image_processor(images=image, return_tensors="pt").to(device)
|
36 |
+
|
37 |
+
# Model inference (no gradients needed)
|
38 |
+
with torch.no_grad():
|
39 |
+
outputs = model(**inputs)
|
40 |
+
predicted_depth = outputs.predicted_depth
|
41 |
+
|
42 |
+
# Interpolate depth map to match the frame's dimensions
|
43 |
+
prediction = torch.nn.functional.interpolate(
|
44 |
+
predicted_depth.unsqueeze(1),
|
45 |
+
size=(frame.shape[0], frame.shape[1]), # Match the frame's dimensions
|
46 |
+
mode="bicubic",
|
47 |
+
align_corners=False,
|
48 |
+
)
|
49 |
+
|
50 |
+
# Convert depth map to numpy for visualization
|
51 |
+
depth_map = prediction.squeeze().cpu().numpy()
|
52 |
+
|
53 |
+
# Normalize depth map for display (visualization purposes)
|
54 |
+
depth_map_normalized = np.uint8(depth_map / np.max(depth_map) * 255)
|
55 |
+
depth_map_colored = cv2.applyColorMap(depth_map_normalized, cv2.COLORMAP_JET)
|
56 |
+
|
57 |
+
# Display the original frame and the depth map in Streamlit
|
58 |
+
st.image(frame, caption="Original Webcam Image", channels="BGR", use_column_width=True)
|
59 |
+
st.image(depth_map_colored, caption="Depth Map", channels="BGR", use_column_width=True)
|
60 |
+
|
61 |
+
# Release the capture object when done
|
62 |
+
cap.release()
|