# Importing the requirements import numpy as np from PIL import Image import gradio as gr import torch from transformers import DPTImageProcessor, DPTForDepthEstimation # Load the model and feature extractor feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-beit-large-512") model = DPTForDepthEstimation.from_pretrained("Intel/dpt-beit-large-512") # Function to process an image and return the formatted depth map as an image def process_image(image): """ Preprocesses an image, passes it through a model, and returns the formatted depth map as an image. Args: image (PIL.Image.Image): The input image. Returns: PIL.Image.Image: The formatted depth map as an image. """ # Check if the image is provided if not image: gr.Warning("No image provided") # Preprocess the image for the model encoding = feature_extractor(image, return_tensors="pt") # Forward pass through the model with torch.no_grad(): outputs = model(**encoding) predicted_depth = outputs.predicted_depth # Interpolate the predicted depth map to the original image size prediction = torch.nn.functional.interpolate( predicted_depth.unsqueeze(1), size=image.size[::-1], mode="bicubic", align_corners=False, ).squeeze() output = prediction.cpu().numpy() formatted = (output * 255 / np.max(output)).astype("uint8") # Return the formatted depth map as an image return Image.fromarray(formatted)