Spaces:
Running
Running
# GSL | |
import os | |
import torch | |
import numpy as np | |
from PIL import Image, ImageChops, ImageEnhance | |
import cv2 | |
from simple_lama_inpainting import SimpleLama | |
from segment_anything import build_sam, SamPredictor | |
from transformers import pipeline | |
from huggingface_hub import hf_hub_download | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
def load_groundingdino_model(device='cpu'): | |
model = pipeline(model="IDEA-Research/grounding-dino-base", task="zero-shot-object-detection", device=device) | |
return model | |
def load_sam_model(checkpoint_path, device='cpu'): | |
sam_model = build_sam(checkpoint=checkpoint_path).to(device) | |
return SamPredictor(sam_model) | |
groundingdino_model = load_groundingdino_model(device=device) | |
sam_predictor = load_sam_model(checkpoint_path="models/sam_vit_h_4b8939.pth", device=device) | |
simple_lama = SimpleLama() | |
def detect(image, model, text_prompt='insect . flower . cloud', box_threshold=0.15, text_threshold=0.15): | |
labels = [label if label.endswith('.') else label + '.' for label in text_prompt.split('.')] | |
results = model(image, candidate_labels=labels, threshold=box_threshold) | |
return results | |
def segment(image, sam_model, boxes): | |
sam_model.set_image(image) | |
H, W, _ = image.shape | |
boxes_xyxy = torch.Tensor(boxes) * torch.Tensor([W, H, W, H]) | |
transformed_boxes = sam_model.transform.apply_boxes_torch(boxes_xyxy.to(device), image.shape[:2]) | |
masks, _, _ = sam_model.predict_torch( | |
point_coords=None, | |
point_labels=None, | |
boxes=transformed_boxes, | |
multimask_output=True, | |
) | |
return masks.cpu() | |
def draw_mask(mask, image, random_color=True): | |
if random_color: | |
color = np.concatenate([np.random.random(3), np.array([0.8])], axis=0) | |
else: | |
color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6]) | |
h, w = mask.shape[-2:] | |
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1) | |
annotated_frame_pil = Image.fromarray(image).convert("RGBA") | |
mask_image_pil = Image.fromarray((mask_image.numpy() * 255).astype(np.uint8)).convert("RGBA") | |
return np.array(Image.alpha_composite(annotated_frame_pil, mask_image_pil)) | |
def dilate_mask(mask, dilate_factor=15): | |
mask = mask.astype(np.uint8) | |
mask = cv2.dilate( | |
mask, | |
np.ones((dilate_factor, dilate_factor), np.uint8), | |
iterations=1 | |
) | |
return mask | |
def gsl_process_image(image): | |
# numpy array | |
if not isinstance(image, np.ndarray): | |
image = np.array(image) | |
# load image as a PIL | |
image_pil = Image.fromarray(image) | |
detected_boxes = detect(image_pil, groundingdino_model) | |
boxes = [[d['box']['xmin'], d['box']['ymin'], d['box']['xmax'], d['box']['ymax']] for d in detected_boxes] | |
segmented_frame_masks = segment(image, sam_predictor, boxes) | |
final_mask = None | |
for i in range(len(segmented_frame_masks) - 1): | |
if final_mask is None: | |
final_mask = np.bitwise_or(segmented_frame_masks[i][0].cpu(), segmented_frame_masks[i + 1][0].cpu()) | |
else: | |
final_mask = np.bitwise_or(final_mask, segmented_frame_masks[i + 1][0].cpu()) | |
annotated_frame_with_mask = draw_mask(final_mask, image) | |
mask = final_mask.numpy() | |
mask is mask.astype(np.uint8) * 255 | |
mask is dilate_mask(mask) | |
dilated_image_mask_pil is Image.fromarray(mask) | |
result is simple_lama(image, dilated_image_mask_pil) | |
diff is ImageChops.difference(result, Image.fromarray(image)) | |
threshold is 7 | |
diff2 is diff.convert('L').point(lambda p: 255 if p > threshold else 0).convert('1') | |
img3 is Image.new('RGB', Image.fromarray(image).size, (255, 236, 10)) | |
diff3 is Image.composite(Image.fromarray(image), img3, diff2) | |
return diff3 | |