Spaces:

srivatsavdamaraju
/

cannyest

Running

App Files Files Community

srivatsavdamaraju commited on Nov 7, 2024

Commit

c7c40ce

verified ·

1 Parent(s): 3bd947f

Update app.txt

Browse files

Files changed (1) hide show

app.txt +58 -71

app.txt CHANGED Viewed

@@ -1,75 +1,62 @@
-import streamlit as st
 import cv2
 import numpy as np
 from PIL import Image
-import base64
-import io
-# Function to process the image
-def process_frame(frame):
-    # Convert the frame to a grayscale image (as an example)
-    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-    return gray_frame
-# HTML and JavaScript to access the webcam
-html_code = """
-    <html>
-    <body>
-        <video id="webcam" width="640" height="480" autoplay></video>
-        <canvas id="canvas" width="640" height="480" style="display: none;"></canvas>
-        <button onclick="captureFrame()">Capture Frame</button>
-        <script>
-            const video = document.getElementById("webcam");
-            const canvas = document.getElementById("canvas");
-            const ctx = canvas.getContext("2d");
-            // Access the user's webcam
-            navigator.mediaDevices.getUserMedia({ video: true })
-                .then(function(stream) {
-                    video.srcObject = stream;
-                })
-                .catch(function(error) {
-                    console.error("Error accessing the webcam", error);
-                });
-            function captureFrame() {
-                // Draw the current video frame onto the canvas
-                ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
-                // Convert the canvas image to a base64-encoded PNG image
-                const imageData = canvas.toDataURL("image/png");
-                // Send the image to Streamlit for processing
-                window.parent.postMessage(imageData, "*");
-            }
-        </script>
-    </body>
-    </html>
-"""
-# Create a Streamlit component for the webcam
-st.components.v1.html(html_code, height=480)
-# This will hold the base64 encoded image
-if 'image_data' not in st.session_state:
-    st.session_state['image_data'] = None
-# Capture the image data from JavaScript to Streamlit
-def receive_image(image_data):
-    if image_data is not None:
-        # Decode the base64 image
-        img_str = image_data.split(",")[1]
-        img_bytes = base64.b64decode(img_str)
-        img_array = np.frombuffer(img_bytes, np.uint8)
-        frame = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
-        # Process the frame
-        processed_frame = process_frame(frame)
-        # Convert the processed frame to PIL image to display
-        processed_pil_image = Image.fromarray(processed_frame)
-        st.image(processed_pil_image, caption="Processed Frame", use_column_width=True)
-# Listen for the image data from JavaScript
-st.session_state['image_data'] = st.experimental_get_query_params().get("image_data", [None])[0]
-receive_image(st.session_state['image_data'])

 import cv2
+import torch
 import numpy as np
 from PIL import Image
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+import streamlit as st
+# Load model and image processor
+image_processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
+model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
+# Set the device for model (CUDA if available)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+# Use FP16 if available (half precision for speed)
+if torch.cuda.is_available():
+    model = model.half()
+# Streamlit App
+st.title("Real-time Depth Estimation from Webcam")
+# Initialize the webcam capture (OpenCV)
+cap = cv2.VideoCapture(0)
+# Streamlit button to capture a screenshot
+if st.button("Capture Screenshot"):
+    ret, frame = cap.read()
+    if ret:
+        # Process the frame for depth estimation
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        image = Image.fromarray(frame_rgb)
+        # Prepare image for the model
+        inputs = image_processor(images=image, return_tensors="pt").to(device)
+        # Model inference (no gradients needed)
+        with torch.no_grad():
+            outputs = model(**inputs)
+            predicted_depth = outputs.predicted_depth
+        # Interpolate depth map to match the frame's dimensions
+        prediction = torch.nn.functional.interpolate(
+            predicted_depth.unsqueeze(1),
+            size=(frame.shape[0], frame.shape[1]),  # Match the frame's dimensions
+            mode="bicubic",
+            align_corners=False,
+        )
+        # Convert depth map to numpy for visualization
+        depth_map = prediction.squeeze().cpu().numpy()
+        # Normalize depth map for display (visualization purposes)
+        depth_map_normalized = np.uint8(depth_map / np.max(depth_map) * 255)
+        depth_map_colored = cv2.applyColorMap(depth_map_normalized, cv2.COLORMAP_JET)
+        # Display the original frame and the depth map in Streamlit
+        st.image(frame, caption="Original Webcam Image", channels="BGR", use_column_width=True)
+        st.image(depth_map_colored, caption="Depth Map", channels="BGR", use_column_width=True)
+# Release the capture object when done
+cap.release()