File size: 2,603 Bytes
53f5f46
 
 
 
 
 
 
 
 
 
 
 
a64f32c
53f5f46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from flask import Flask, request, render_template, jsonify
import cv2
import numpy as np
import torch
from torchvision import transforms
import base64
from io import BytesIO
from PIL import Image
import threading
import queue

# Load the MiDaS model from PyTorch Hub
model = torch.hub.load("intel-isl/MiDaS", "MiDaS_small", force_reload=True)
model.eval()

# Image transformation function
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Create Flask app
app = Flask(__name__)

# Function to estimate depth from a frame and apply color mapping
def estimate_depth(frame):
    input_batch = transform(frame).unsqueeze(0)
    with torch.no_grad():
        prediction = model(input_batch)
        depth_map = prediction.squeeze().cpu().numpy()
        
        # Normalize and apply a colormap
        depth_map = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX)
        depth_map = depth_map.astype(np.uint8)
        colored_depth_map = cv2.applyColorMap(depth_map, cv2.COLORMAP_JET)
        return colored_depth_map

# Function to process the video frame in a separate thread
def process_frame_thread(data, response_queue):
    image_data = base64.b64decode(data.split(',')[1])
    image = Image.open(BytesIO(image_data))
    frame = np.array(image)

    # Convert RGB to BGR format (as OpenCV expects BGR)
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    depth_map = estimate_depth(frame)

    # Encode depth map as a base64 image to send back
    _, buffer = cv2.imencode('.jpg', depth_map)
    depth_map_base64 = base64.b64encode(buffer).decode('utf-8')

    # Add the result to the response queue
    response_queue.put(f"data:image/jpeg;base64,{depth_map_base64}")

# Route to serve the HTML template
@app.route('/')
def index():
    return render_template('index.html')

# Route to process video frames and return depth map
@app.route('/process_frame', methods=['POST'])
def process_frame():
    data = request.json['image']
    
    # Create a queue to hold the response from the background thread
    response_queue = queue.Queue()
    
    # Start the processing thread
    thread = threading.Thread(target=process_frame_thread, args=(data, response_queue))
    thread.start()
    
    # Wait for the thread to complete and get the result from the queue
    thread.join()
    depth_map_base64 = response_queue.get()

    return jsonify({'depth_map': depth_map_base64})

if __name__ == "__main__":
    app.run(debug=True)