File size: 3,549 Bytes
11a270e
 
38bf607
11a270e
52f5763
c580a28
 
52f5763
38bf607
52f5763
 
 
38bf607
52f5763
38bf607
52f5763
 
38bf607
52f5763
 
38bf607
52f5763
 
 
 
 
 
 
38bf607
52f5763
 
 
 
841589e
38bf607
841589e
52f5763
 
 
38bf607
52f5763
 
 
 
 
 
 
 
38bf607
52f5763
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e1ea0af
52f5763
c580a28
 
 
 
38bf607
 
 
c580a28
 
 
 
 
 
52f5763
 
38bf607
52f5763
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import gradio as gr
from transformers import pipeline
from PIL import Image, ImageFilter
import numpy as np

segmentation_model = pipeline("image-segmentation", model="nvidia/segformer-b1-finetuned-cityscapes-1024-1024")
depth_estimator = pipeline("depth-estimation", model="Intel/zoedepth-nyu-kitti")

def process_image(input_image, method, blur_intensity):
    """
    Process the input image using one of two methods:
    
    1. Segmentation Blur Model:
       - Uses segmentation to extract a foreground mask.
       - Applies Gaussian blur to the background.
       - Composites the final image.
       
    2. Monocular Depth Estimation Model:
       - Uses depth estimation to generate a depth map.
       - Normalizes the depth map to be used as a blending mask.
       - Blends a fully blurred version with the original image.
       
    Returns:
       - output_image: final composited image.
       - mask_image: the mask used (binary for segmentation, normalized depth for depth-based).
    """
    input_image = input_image.convert("RGB")
    
    if method == "Segmentation Blur Model":
        results = segmentation_model(input_image)
        foreground_mask = results[-1]["mask"]
        foreground_mask = foreground_mask.convert("L")
        binary_mask = foreground_mask.point(lambda p: 255 if p > 128 else 0)
        
        blurred_background = input_image.filter(ImageFilter.GaussianBlur(radius=blur_intensity))
        
        output_image = Image.composite(input_image, blurred_background, binary_mask)
        mask_image = binary_mask
        
    elif method == "Monocular Depth Estimation Model":
        depth_results = depth_estimator(input_image)
        depth_map = depth_results["depth"]
        
        depth_array = np.array(depth_map).astype(np.float32)
        norm = (depth_array - depth_array.min()) / (depth_array.max() - depth_array.min() + 1e-8)
        normalized_depth = (norm * 255).astype(np.uint8)
        mask_image = Image.fromarray(normalized_depth)
        
        blurred_image = input_image.filter(ImageFilter.GaussianBlur(radius=blur_intensity))
        
        orig_np = np.array(input_image).astype(np.float32)
        blur_np = np.array(blurred_image).astype(np.float32)
        alpha = normalized_depth[..., np.newaxis] / 255.0
        
        blended_np = (1 - alpha) * orig_np + alpha * blur_np
        blended_np = np.clip(blended_np, 0, 255).astype(np.uint8)
        output_image = Image.fromarray(blended_np)
    
    else:
        output_image = input_image
        mask_image = input_image.convert("L")
    
    return output_image, mask_image

with gr.Blocks() as demo:
    gr.Markdown("## FocusFusion: Segmentation & Depth Blur")
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(label="Input Image", type="pil")
            method = gr.Radio(label="Processing Method", 
                            choices=["Segmentation Blur Model", "Monocular Depth Estimation Model"],
                            value="Segmentation Blur Model")
            blur_intensity = gr.Slider(label="Blur Intensity (sigma)", 
                                    minimum=1, maximum=30, step=1, value=15)
            run_button = gr.Button("Process Image")
        with gr.Column():
            output_image = gr.Image(label="Output Image")
            mask_output = gr.Image(label="Mask")
    
    run_button.click(
        fn=process_image, 
        inputs=[input_image, method, blur_intensity], 
        outputs=[output_image, mask_output]
    )

demo.launch()