File size: 4,350 Bytes
11a270e
 
38bf607
11a270e
52f5763
c580a28
 
 
52f5763
38bf607
52f5763
 
 
38bf607
52f5763
38bf607
52f5763
 
38bf607
52f5763
 
38bf607
52f5763
 
 
 
 
 
 
 
38bf607
c580a28
52f5763
c580a28
52f5763
c580a28
52f5763
c580a28
52f5763
841589e
38bf607
 
841589e
c580a28
52f5763
 
 
38bf607
c580a28
52f5763
 
 
 
 
 
 
 
 
38bf607
 
52f5763
c580a28
52f5763
 
c580a28
52f5763
 
c580a28
52f5763
 
 
 
 
 
 
 
 
 
 
 
 
 
c580a28
 
 
 
38bf607
 
 
c580a28
 
 
 
 
 
 
52f5763
 
38bf607
52f5763
 
 
841589e
52f5763
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import gradio as gr
from transformers import pipeline
from PIL import Image, ImageFilter
import numpy as np

# Initialize models with fixed choices
segmentation_model = pipeline("image-segmentation", model="nvidia/segformer-b1-finetuned-cityscapes-1024-1024")
depth_estimator = pipeline("depth-estimation", model="Intel/zoedepth-nyu-kitti")

def process_image(input_image, method, blur_intensity):
    """
    Process the input image using one of two methods:
    
    1. Segmentation Blur Model:
       - Uses segmentation to extract a foreground mask.
       - Applies Gaussian blur to the background.
       - Composites the final image.
       
    2. Monocular Depth Estimation Model:
       - Uses depth estimation to generate a depth map.
       - Normalizes the depth map to be used as a blending mask.
       - Blends a fully blurred version with the original image.
       
    Returns:
       - output_image: final composited image.
       - mask_image: the mask used (binary for segmentation, normalized depth for depth-based).
    """
    # Ensure image is in RGB mode
    input_image = input_image.convert("RGB")
    
    if method == "Segmentation Blur Model":
        # Use segmentation to obtain a foreground mask
        results = segmentation_model(input_image)
        # Assume the last result is the main foreground object
        foreground_mask = results[-1]["mask"]
        # Ensure the mask is grayscale
        foreground_mask = foreground_mask.convert("L")
        # Threshold to create a binary mask
        binary_mask = foreground_mask.point(lambda p: 255 if p > 128 else 0)
        
        # Blur the background using Gaussian blur
        blurred_background = input_image.filter(ImageFilter.GaussianBlur(radius=blur_intensity))
        
        # Composite the final image: keep foreground and use blurred background elsewhere
        output_image = Image.composite(input_image, blurred_background, binary_mask)
        mask_image = binary_mask
        
    elif method == "Monocular Depth Estimation Model":
        # Generate depth map
        depth_results = depth_estimator(input_image)
        depth_map = depth_results["depth"]
        
        # Convert depth map to numpy array and normalize to [0, 255]
        depth_array = np.array(depth_map).astype(np.float32)
        norm = (depth_array - depth_array.min()) / (depth_array.max() - depth_array.min() + 1e-8)
        normalized_depth = (norm * 255).astype(np.uint8)
        mask_image = Image.fromarray(normalized_depth)
        
        # Create fully blurred version using Gaussian blur
        blurred_image = input_image.filter(ImageFilter.GaussianBlur(radius=blur_intensity))
        
        # Convert images to arrays for blending
        orig_np = np.array(input_image).astype(np.float32)
        blur_np = np.array(blurred_image).astype(np.float32)
        # Reshape mask for broadcasting
        alpha = normalized_depth[..., np.newaxis] / 255.0
        
        # Blend pixels: 0 = original; 1 = fully blurred
        blended_np = (1 - alpha) * orig_np + alpha * blur_np
        blended_np = np.clip(blended_np, 0, 255).astype(np.uint8)
        output_image = Image.fromarray(blended_np)
    
    else:
        output_image = input_image
        mask_image = input_image.convert("L")
    
    return output_image, mask_image

# Build a Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## Image Processing App: Segmentation & Depth-based Blur")
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(label="Input Image", type="pil")
            method = gr.Radio(label="Processing Method", 
                            choices=["Segmentation Blur Model", "Monocular Depth Estimation Model"],
                            value="Segmentation Blur Model")
            blur_intensity = gr.Slider(label="Blur Intensity (sigma)", 
                                    minimum=1, maximum=30, step=1, value=15)
            run_button = gr.Button("Process Image")
        with gr.Column():
            output_image = gr.Image(label="Output Image")
            mask_output = gr.Image(label="Mask")
    
    # Set up event handler
    run_button.click(
        fn=process_image, 
        inputs=[input_image, method, blur_intensity], 
        outputs=[output_image, mask_output]
    )

# Launch the app
demo.launch()