Spaces:
Running
Running
File size: 3,760 Bytes
6dc32ee 9fcd716 850cda3 cfccf84 3657d52 850cda3 3657d52 81b2e04 850cda3 9fcd716 cfccf84 3657d52 9fcd716 850cda3 3657d52 850cda3 3657d52 850cda3 746e19a 3657d52 9fcd716 6dc32ee 9fcd716 6dc32ee 3657d52 850cda3 3657d52 850cda3 5e1955d 850cda3 5e1955d 850cda3 5e1955d 850cda3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
# GSL
import os
import torch
import numpy as np
from PIL import Image, ImageChops, ImageEnhance
import cv2
from simple_lama_inpainting import SimpleLama
from segment_anything import build_sam, SamPredictor
from transformers import pipeline
from huggingface_hub import hf_hub_download
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def load_groundingdino_model(device='cpu'):
model = pipeline(model="IDEA-Research/grounding-dino-base", task="zero-shot-object-detection", device=device)
return model
def load_sam_model(checkpoint_path, device='cpu'):
sam_model = build_sam(checkpoint=checkpoint_path).to(device)
return SamPredictor(sam_model)
groundingdino_model = load_groundingdino_model(device=device)
sam_predictor = load_sam_model(checkpoint_path="models/sam_vit_h_4b8939.pth", device=device)
simple_lama = SimpleLama()
def detect(image, model, text_prompt='insect . flower . cloud', box_threshold=0.15, text_threshold=0.15):
labels = [label if label.endswith('.') else label + '.' for label in text_prompt.split('.')]
results = model(image, candidate_labels=labels, threshold=box_threshold)
return results
def segment(image, sam_model, boxes):
sam_model.set_image(image)
H, W, _ = image.shape
boxes_xyxy = torch.Tensor(boxes) * torch.Tensor([W, H, W, H])
transformed_boxes = sam_model.transform.apply_boxes_torch(boxes_xyxy.to(device), image.shape[:2])
masks, _, _ = sam_model.predict_torch(
point_coords=None,
point_labels=None,
boxes=transformed_boxes,
multimask_output=True,
)
return masks.cpu()
def draw_mask(mask, image, random_color=True):
if random_color:
color = np.concatenate([np.random.random(3), np.array([0.8])], axis=0)
else:
color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
h, w = mask.shape[-2:]
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
annotated_frame_pil = Image.fromarray(image).convert("RGBA")
mask_image_pil = Image.fromarray((mask_image.numpy() * 255).astype(np.uint8)).convert("RGBA")
return np.array(Image.alpha_composite(annotated_frame_pil, mask_image_pil))
def dilate_mask(mask, dilate_factor=15):
mask = mask.astype(np.uint8)
mask = cv2.dilate(
mask,
np.ones((dilate_factor, dilate_factor), np.uint8),
iterations=1
)
return mask
@spaces.GPU
def gsl_process_image(image):
# numpy array
if not isinstance(image, np.ndarray):
image = np.array(image)
# load image as a PIL
image_pil = Image.fromarray(image)
detected_boxes = detect(image_pil, groundingdino_model)
boxes = [[d['box']['xmin'], d['box']['ymin'], d['box']['xmax'], d['box']['ymax']] for d in detected_boxes]
segmented_frame_masks = segment(image, sam_predictor, boxes)
final_mask = None
for i in range(len(segmented_frame_masks) - 1):
if final_mask is None:
final_mask = np.bitwise_or(segmented_frame_masks[i][0].cpu(), segmented_frame_masks[i + 1][0].cpu())
else:
final_mask = np.bitwise_or(final_mask, segmented_frame_masks[i + 1][0].cpu())
annotated_frame_with_mask = draw_mask(final_mask, image)
mask = final_mask.numpy()
mask = mask.astype(np.uint8) * 255
mask = dilate_mask(mask)
dilated_image_mask_pil = Image.fromarray(mask) # test
result = simple_lama(image, dilated_image_mask_pil)
diff = ImageChops.difference(result, Image.fromarray(image))
threshold = 7
diff2 = diff.convert('L').point(lambda p: 255 if p > threshold else 0).convert('1')
img3 = Image.new('RGB', Image.fromarray(image).size, (255, 236, 10))
diff3 = Image.composite(Image.fromarray(image), img3, diff2)
return diff3
|