Spaces:

huzey
/

ncut-pytorch

Sleeping

App Files Files Community

huzey commited on Oct 9, 2024

Commit

03a488b

1 Parent(s): 369bb85

update mask logic

Browse files

Files changed (1) hide show

app.py +39 -37

app.py CHANGED Viewed

@@ -3134,10 +3134,10 @@ with demo:
                     image3_slider = gr.Slider(0, 100, step=1, label="Image#3 Index", value=2, elem_id="image3_slider", interactive=True)
                 load_one_image_button = gr.Button("🔴 Load", elem_id="load_one_image_button", variant='primary')
                 gr.Markdown("### Step 2b: Draw Points")
-                gr.Markdown("##### 🖱️ Left Click: Foreground")
-                gr.Markdown("##### 🖱️ Middle Click: Background")
                 gr.Markdown("""
                     <h5>
                     Top Right Buttons: </br>
                     <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
                     stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"
@@ -3154,6 +3154,8 @@ with demo:
                     <path stroke="currentColor" stroke-linecap="round" stroke-width="1.5" d="M9 21h12"></path></g>
                     </svg> :
                     Clear All Points
                     </h5>
                 """)
                 prompt_image1 = ImagePrompter(show_label=False, elem_id="prompt_image1", interactive=False)
@@ -3194,7 +3196,9 @@ with demo:
                 mask_gallery = gr.Gallery(value=[], label="Segmentation Masks", show_label=True, elem_id="mask_gallery", columns=[3], rows=[1], object_fit="contain", height="auto", show_share_button=True, interactive=False)
                 run_crop_button = gr.Button("🔴 RUN", elem_id="run_crop_button", variant='primary')
                 add_download_button(mask_gallery, "mask")
-                distance_threshold_slider = gr.Slider(0, 1, step=0.01, label="Mask Threshold", value=0.5, elem_id="distance_threshold", info="increase for smaller mask")
                 # filter_small_area_checkbox = gr.Checkbox(label="Noise Reduction", value=True, elem_id="filter_small_area_checkbox")
                 distance_power_slider = gr.Slider(-3, 3, step=0.01, label="Distance Power", value=0.5, elem_id="distance_power", info="d = d^p", visible=False)
                 crop_gallery = gr.Gallery(value=[], label="Cropped Images", show_label=True, elem_id="crop_gallery", columns=[3], rows=[1], object_fit="contain", height="auto", show_share_button=True, interactive=False)
@@ -3234,7 +3238,7 @@ with demo:
                     return rgbs
                 def run_crop(original_images, ncut_images, prompts1, prompts2, prompts3, image_idx1, image_idx2, image_idx3,
-                            crop_expand, distance_threshold, distance_power, area_threshold):
                     ncut_images = [image[0] for image in ncut_images]
                     if len(ncut_images) == 0:
                         return []
@@ -3252,30 +3256,6 @@ with demo:
                     h, w = ncut_images[0].shape[:2]
                     ncut_pixels = torch.tensor(np.array(ncut_pixels).reshape(-1, 3)) / 255
                     # normalized_ncut_pixels = F.normalize(ncut_pixels, p=2, dim=-1)
-                    positive_distances = []
-                    negative_distances = []
-                    for rgb, is_positive in rgbs:
-                        rgb = torch.tensor(rgb).float() / 255
-                        # rgb = F.normalize(rgb, p=2, dim=-1)
-                        distance = (ncut_pixels - rgb[None]).norm(dim=-1)
-                        distance = distance.squeeze(-1)
-                        if is_positive:
-                            positive_distances.append(distance)
-                        else:
-                            negative_distances.append(distance)
-                    if len(positive_distances) == 0:
-                        raise gr.Error("No prompt points. Please draw some points on the image.")
-                    positive_distances = torch.stack(positive_distances)
-                    negative_flag = len(negative_distances) > 0
-                    if len(negative_distances) == 0:
-                        negative_distances = positive_distances * 0  # dummy
-                    else:
-                        negative_distances = torch.stack(negative_distances)
-                    positive_distance = positive_distances.min(dim=0).values
-                    negative_distance = negative_distances.min(dim=0).values
-                    # positive_distance = positive_distances.mean(dim=0)
-                    # negative_distance = negative_distances.mean(dim=0)
                     def to_mask(heatmap, threshold):
                         heatmap = 1 / (heatmap + 1e-6)
@@ -3289,13 +3269,26 @@ with demo:
                         mask = heatmap > threshold
                         return mask
-                    positive_mask = to_mask(positive_distance, distance_threshold)
-                    if negative_flag:
-                        negative_mask = to_mask(negative_distance, distance_threshold)
                         positive_mask = positive_mask & ~negative_mask
-                    #convert to PIL
                     mask = positive_mask.cpu().numpy()
                     mask = mask.astype(np.uint8) * 255
                     mask = [Image.fromarray(mask[i]) for i in range(len(mask))]
@@ -3343,7 +3336,12 @@ with demo:
                         return bounding_boxes, cleaned_pil_mask
                     bboxs, filtered_masks = zip(*[get_bboxes_and_clean_mask(_mask) for _mask in mask])
                     # combine the masks, also draw the bounding boxes
                     combined_masks = []
                     for i_image in range(len(mask)):
@@ -3352,6 +3350,13 @@ with demo:
                         clean_mask = np.array(filtered_masks[i_image].convert("RGB"))
                         combined_mask = noisy_mask * 0.4 + clean_mask
                         combined_mask = np.clip(combined_mask, 0, 255).astype(np.uint8)
                         for x, y, w, h in bbox:
                             cv2.rectangle(combined_mask, (x-1, y-1), (x + w+2, y + h+2), (255, 0, 0), 2)
                         combined_mask = Image.fromarray(combined_mask)
@@ -3381,10 +3386,6 @@ with demo:
                         crop = image.crop((_x, _y, _x + _w, _y + _h))
                         return crop
-                    original_images = [image[0] for image in original_images]
-                    if isinstance(original_images[0], str):
-                        original_images = [Image.open(image) for image in original_images]
                     mask_h, mask_w = filtered_masks[0].size
                     cropped_images = []
                     for _image, _bboxs in zip(original_images, bboxs):
@@ -3395,7 +3396,8 @@ with demo:
                 run_crop_button.click(run_crop,
                     inputs=[input_gallery, output_gallery, prompt_image1, prompt_image2, prompt_image3, image1_slider, image2_slider, image3_slider,
-                            crop_expand_slider, distance_threshold_slider, distance_power_slider, area_threshold_slider],
                     outputs=[mask_gallery, crop_gallery])

                     image3_slider = gr.Slider(0, 100, step=1, label="Image#3 Index", value=2, elem_id="image3_slider", interactive=True)
                 load_one_image_button = gr.Button("🔴 Load", elem_id="load_one_image_button", variant='primary')
                 gr.Markdown("### Step 2b: Draw Points")
                 gr.Markdown("""
                     <h5>
+                    🖱️ Left Click: Foreground </br>
+                    🖱️ Middle Click: Background </br></br>
                     Top Right Buttons: </br>
                     <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none"
                     stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"
                     <path stroke="currentColor" stroke-linecap="round" stroke-width="1.5" d="M9 21h12"></path></g>
                     </svg> :
                     Clear All Points
+                    </br>
+                    (Known issue: please manually clear the points after loading new image)
                     </h5>
                 """)
                 prompt_image1 = ImagePrompter(show_label=False, elem_id="prompt_image1", interactive=False)
                 mask_gallery = gr.Gallery(value=[], label="Segmentation Masks", show_label=True, elem_id="mask_gallery", columns=[3], rows=[1], object_fit="contain", height="auto", show_share_button=True, interactive=False)
                 run_crop_button = gr.Button("🔴 RUN", elem_id="run_crop_button", variant='primary')
                 add_download_button(mask_gallery, "mask")
+                distance_threshold_slider = gr.Slider(0, 1, step=0.01, label="Mask Threshold (Foreground)", value=0.5, elem_id="distance_threshold", info="increase for smaller mask")
+                negative_distance_threshold_slider = gr.Slider(0, 1, step=0.01, label="Mask Threshold (Background)", value=0.5, elem_id="distance_threshold", info="increase for smaller mask")
+                overlay_image_checkbox = gr.Checkbox(label="Overlay Original Image", value=True, elem_id="overlay_image_checkbox")
                 # filter_small_area_checkbox = gr.Checkbox(label="Noise Reduction", value=True, elem_id="filter_small_area_checkbox")
                 distance_power_slider = gr.Slider(-3, 3, step=0.01, label="Distance Power", value=0.5, elem_id="distance_power", info="d = d^p", visible=False)
                 crop_gallery = gr.Gallery(value=[], label="Cropped Images", show_label=True, elem_id="crop_gallery", columns=[3], rows=[1], object_fit="contain", height="auto", show_share_button=True, interactive=False)
                     return rgbs
                 def run_crop(original_images, ncut_images, prompts1, prompts2, prompts3, image_idx1, image_idx2, image_idx3,
+                            crop_expand, distance_threshold, distance_power, area_threshold, overlay_image, negative_distance_threshold):
                     ncut_images = [image[0] for image in ncut_images]
                     if len(ncut_images) == 0:
                         return []
                     h, w = ncut_images[0].shape[:2]
                     ncut_pixels = torch.tensor(np.array(ncut_pixels).reshape(-1, 3)) / 255
                     # normalized_ncut_pixels = F.normalize(ncut_pixels, p=2, dim=-1)
                     def to_mask(heatmap, threshold):
                         heatmap = 1 / (heatmap + 1e-6)
                         mask = heatmap > threshold
                         return mask
+                    positive_masks, negative_masks = [], []
+                    for rgb, is_positive in rgbs:
+                        rgb = torch.tensor(rgb).float() / 255
+                        distance = (ncut_pixels - rgb[None]).norm(dim=-1)
+                        distance = distance.squeeze(-1)
+                        if is_positive:
+                            positive_masks.append(to_mask(distance, distance_threshold))
+                        else:
+                            negative_masks.append(to_mask(distance, negative_distance_threshold))
+                    if len(positive_masks) == 0:
+                        raise gr.Error("No prompt points. Please draw some points on the image.")
+                    positive_masks = torch.stack(positive_masks)
+                    positive_mask = positive_masks.any(dim=0)
+                    if len(negative_masks) > 0:
+                        negative_masks = torch.stack(negative_masks)
+                        negative_mask = negative_masks.any(dim=0)
                         positive_mask = positive_mask & ~negative_mask
+                    # convert to PIL
                     mask = positive_mask.cpu().numpy()
                     mask = mask.astype(np.uint8) * 255
                     mask = [Image.fromarray(mask[i]) for i in range(len(mask))]
                         return bounding_boxes, cleaned_pil_mask
                     bboxs, filtered_masks = zip(*[get_bboxes_and_clean_mask(_mask) for _mask in mask])
+                    original_images = [image[0] for image in original_images]
+                    if isinstance(original_images[0], str):
+                        original_images = [Image.open(image) for image in original_images]
                     # combine the masks, also draw the bounding boxes
                     combined_masks = []
                     for i_image in range(len(mask)):
                         clean_mask = np.array(filtered_masks[i_image].convert("RGB"))
                         combined_mask = noisy_mask * 0.4 + clean_mask
                         combined_mask = np.clip(combined_mask, 0, 255).astype(np.uint8)
+                        if overlay_image:
+                            combined_mask[:, :, 0] = 0  # remove red channel
+                            combined_mask[:, :, 1] = 0  # remove green channel
+                            _image = original_images[i_image].convert("RGB").resize((combined_mask.shape[1], combined_mask.shape[0]))
+                            _image = np.array(_image)
+                            combined_mask = 0.5 * combined_mask + 0.5 * _image
+                            combined_mask = np.clip(combined_mask, 0, 255).astype(np.uint8)
                         for x, y, w, h in bbox:
                             cv2.rectangle(combined_mask, (x-1, y-1), (x + w+2, y + h+2), (255, 0, 0), 2)
                         combined_mask = Image.fromarray(combined_mask)
                         crop = image.crop((_x, _y, _x + _w, _y + _h))
                         return crop
                     mask_h, mask_w = filtered_masks[0].size
                     cropped_images = []
                     for _image, _bboxs in zip(original_images, bboxs):
                 run_crop_button.click(run_crop,
                     inputs=[input_gallery, output_gallery, prompt_image1, prompt_image2, prompt_image3, image1_slider, image2_slider, image3_slider,
+                            crop_expand_slider, distance_threshold_slider, distance_power_slider,
+                            area_threshold_slider, overlay_image_checkbox, negative_distance_threshold_slider],
                     outputs=[mask_gallery, crop_gallery])