File size: 4,350 Bytes
11a270e 38bf607 11a270e 52f5763 c580a28 52f5763 38bf607 52f5763 38bf607 52f5763 38bf607 52f5763 38bf607 52f5763 38bf607 52f5763 38bf607 c580a28 52f5763 c580a28 52f5763 c580a28 52f5763 c580a28 52f5763 841589e 38bf607 841589e c580a28 52f5763 38bf607 c580a28 52f5763 38bf607 52f5763 c580a28 52f5763 c580a28 52f5763 c580a28 52f5763 c580a28 38bf607 c580a28 52f5763 38bf607 52f5763 841589e 52f5763 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import gradio as gr
from transformers import pipeline
from PIL import Image, ImageFilter
import numpy as np
# Initialize models with fixed choices
segmentation_model = pipeline("image-segmentation", model="nvidia/segformer-b1-finetuned-cityscapes-1024-1024")
depth_estimator = pipeline("depth-estimation", model="Intel/zoedepth-nyu-kitti")
def process_image(input_image, method, blur_intensity):
"""
Process the input image using one of two methods:
1. Segmentation Blur Model:
- Uses segmentation to extract a foreground mask.
- Applies Gaussian blur to the background.
- Composites the final image.
2. Monocular Depth Estimation Model:
- Uses depth estimation to generate a depth map.
- Normalizes the depth map to be used as a blending mask.
- Blends a fully blurred version with the original image.
Returns:
- output_image: final composited image.
- mask_image: the mask used (binary for segmentation, normalized depth for depth-based).
"""
# Ensure image is in RGB mode
input_image = input_image.convert("RGB")
if method == "Segmentation Blur Model":
# Use segmentation to obtain a foreground mask
results = segmentation_model(input_image)
# Assume the last result is the main foreground object
foreground_mask = results[-1]["mask"]
# Ensure the mask is grayscale
foreground_mask = foreground_mask.convert("L")
# Threshold to create a binary mask
binary_mask = foreground_mask.point(lambda p: 255 if p > 128 else 0)
# Blur the background using Gaussian blur
blurred_background = input_image.filter(ImageFilter.GaussianBlur(radius=blur_intensity))
# Composite the final image: keep foreground and use blurred background elsewhere
output_image = Image.composite(input_image, blurred_background, binary_mask)
mask_image = binary_mask
elif method == "Monocular Depth Estimation Model":
# Generate depth map
depth_results = depth_estimator(input_image)
depth_map = depth_results["depth"]
# Convert depth map to numpy array and normalize to [0, 255]
depth_array = np.array(depth_map).astype(np.float32)
norm = (depth_array - depth_array.min()) / (depth_array.max() - depth_array.min() + 1e-8)
normalized_depth = (norm * 255).astype(np.uint8)
mask_image = Image.fromarray(normalized_depth)
# Create fully blurred version using Gaussian blur
blurred_image = input_image.filter(ImageFilter.GaussianBlur(radius=blur_intensity))
# Convert images to arrays for blending
orig_np = np.array(input_image).astype(np.float32)
blur_np = np.array(blurred_image).astype(np.float32)
# Reshape mask for broadcasting
alpha = normalized_depth[..., np.newaxis] / 255.0
# Blend pixels: 0 = original; 1 = fully blurred
blended_np = (1 - alpha) * orig_np + alpha * blur_np
blended_np = np.clip(blended_np, 0, 255).astype(np.uint8)
output_image = Image.fromarray(blended_np)
else:
output_image = input_image
mask_image = input_image.convert("L")
return output_image, mask_image
# Build a Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## Image Processing App: Segmentation & Depth-based Blur")
with gr.Row():
with gr.Column():
input_image = gr.Image(label="Input Image", type="pil")
method = gr.Radio(label="Processing Method",
choices=["Segmentation Blur Model", "Monocular Depth Estimation Model"],
value="Segmentation Blur Model")
blur_intensity = gr.Slider(label="Blur Intensity (sigma)",
minimum=1, maximum=30, step=1, value=15)
run_button = gr.Button("Process Image")
with gr.Column():
output_image = gr.Image(label="Output Image")
mask_output = gr.Image(label="Mask")
# Set up event handler
run_button.click(
fn=process_image,
inputs=[input_image, method, blur_intensity],
outputs=[output_image, mask_output]
)
# Launch the app
demo.launch() |