Spaces:
Running
Running
Update mobile-sam.py
Browse files- mobile-sam.py +163 -2
mobile-sam.py
CHANGED
|
@@ -1,3 +1,164 @@
|
|
| 1 |
import os
|
| 2 |
-
os.system('pip install
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
+
#os.system('cd GroundingDINO && pip install -e. && cd .. && cd segment_anything && pip install -e. && cd ..')
|
| 3 |
+
import cv2
|
| 4 |
+
import gradio as gr
|
| 5 |
+
from PIL import Image
|
| 6 |
+
import numpy as np
|
| 7 |
+
from sam_extension.utils import add_points_tag, add_boxes_tag, mask2greyimg
|
| 8 |
+
from sam_extension.pipeline import SAMEncoderPipeline, SAMDecoderPipeline, GroundingDinoPipeline
|
| 9 |
+
point_coords = []
|
| 10 |
+
point_labels = []
|
| 11 |
+
boxes = []
|
| 12 |
+
boxes_point = []
|
| 13 |
+
texts = []
|
| 14 |
+
sam_encoder_pipeline = None
|
| 15 |
+
sam_decoder_pipeline = None
|
| 16 |
+
result_list = []
|
| 17 |
+
result_index_list = []
|
| 18 |
+
mask_result_list = []
|
| 19 |
+
mask_result_index_list = []
|
| 20 |
+
def resize(image, des_max=512):
|
| 21 |
+
h, w = image.shape[:2]
|
| 22 |
+
if h >= w:
|
| 23 |
+
new_h = des_max
|
| 24 |
+
new_w = int(des_max * w / h)
|
| 25 |
+
else:
|
| 26 |
+
new_w = des_max
|
| 27 |
+
new_h = int(des_max * h / w)
|
| 28 |
+
return cv2.resize(image, (new_w, new_h))
|
| 29 |
+
def show_prompt(img, prompt_mode, pos_point, evt: gr.SelectData): # SelectData is a subclass of EventData
|
| 30 |
+
global point_coords, point_labels, boxes_point, boxes
|
| 31 |
+
if prompt_mode == 'point':
|
| 32 |
+
point_coords.append([evt.index[0], evt.index[1]])
|
| 33 |
+
point_labels.append(1 if pos_point else 0)
|
| 34 |
+
result_img = add_points_tag(img, np.array(point_labels), np.array(point_coords))
|
| 35 |
+
elif prompt_mode == 'box':
|
| 36 |
+
boxes_point.append(evt.index[0])
|
| 37 |
+
boxes_point.append(evt.index[1])
|
| 38 |
+
if len(boxes_point) == 4:
|
| 39 |
+
boxes.append(boxes_point)
|
| 40 |
+
boxes_point = []
|
| 41 |
+
result_img = add_boxes_tag(img, np.array(boxes))
|
| 42 |
+
else:
|
| 43 |
+
result_img = img
|
| 44 |
+
return result_img, point_coords, point_labels, boxes_point, boxes
|
| 45 |
+
|
| 46 |
+
def reset_points(img):
|
| 47 |
+
global point_coords, point_labels
|
| 48 |
+
point_coords = []
|
| 49 |
+
point_labels = []
|
| 50 |
+
return img, point_coords, point_labels
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def reset_boxes(img):
|
| 54 |
+
global boxes_point, boxes
|
| 55 |
+
boxes_point = []
|
| 56 |
+
boxes = []
|
| 57 |
+
return img, boxes_point, boxes
|
| 58 |
+
|
| 59 |
+
def load_sam(sam_ckpt_path, sam_version):
|
| 60 |
+
global sam_encoder_pipeline, sam_decoder_pipeline
|
| 61 |
+
sam_encoder_pipeline = SAMEncoderPipeline.from_pretrained(ckpt_path=sam_ckpt_path, sam_version=sam_version, device='cpu')
|
| 62 |
+
sam_decoder_pipeline = SAMDecoderPipeline.from_pretrained(ckpt_path=sam_ckpt_path, sam_version=sam_version, device='cpu')
|
| 63 |
+
return 'sam loaded!'
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def generate_mask(img, prompt_mode, text_prompt):
|
| 67 |
+
global result_list, mask_result_list, result_index_list, mask_result_index_list
|
| 68 |
+
image = Image.fromarray(img)
|
| 69 |
+
img_size = sam_decoder_pipeline.img_size
|
| 70 |
+
des_img = image.resize((img_size, img_size))
|
| 71 |
+
sam_encoder_output = sam_encoder_pipeline(des_img)
|
| 72 |
+
if prompt_mode == 'point':
|
| 73 |
+
point_coords_ = np.array(point_coords)
|
| 74 |
+
point_labels_ = np.array(point_labels)
|
| 75 |
+
boxes_ = None
|
| 76 |
+
texts_ = None
|
| 77 |
+
grounding_dino_pipeline = None
|
| 78 |
+
elif prompt_mode == 'box':
|
| 79 |
+
point_coords_ = None
|
| 80 |
+
point_labels_ = None
|
| 81 |
+
boxes_ = np.array(boxes)
|
| 82 |
+
texts_ = None
|
| 83 |
+
grounding_dino_pipeline = None
|
| 84 |
+
else:
|
| 85 |
+
point_coords_ = None
|
| 86 |
+
point_labels_ = None
|
| 87 |
+
boxes_ = None
|
| 88 |
+
texts_ = text_prompt.split(',')
|
| 89 |
+
grounding_dino_pipeline = GroundingDinoPipeline.from_pretrained(
|
| 90 |
+
'GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py',
|
| 91 |
+
'weights/groundingdino/groundingdino_swint_ogc.pth',
|
| 92 |
+
device='cpu')
|
| 93 |
+
result_list, mask_result_list, masks_list = sam_decoder_pipeline.visualize_results(
|
| 94 |
+
image,
|
| 95 |
+
des_img,
|
| 96 |
+
sam_encoder_output,
|
| 97 |
+
point_coords=point_coords_,
|
| 98 |
+
point_labels=point_labels_,
|
| 99 |
+
boxes=boxes_,
|
| 100 |
+
texts=texts_,
|
| 101 |
+
grounding_dino_pipeline=grounding_dino_pipeline,
|
| 102 |
+
multimask_output=True,
|
| 103 |
+
visualize_promts=True,
|
| 104 |
+
pil=False)
|
| 105 |
+
# result_index_list = [f'result_{i}' for i in range(len(result_list))]
|
| 106 |
+
# mask_result_index_list = [f'mask_result_{i}' for i in range(len(mask_result_list))]
|
| 107 |
+
return 'mask generated!', f'result_num : {len(result_list)}', f'mask_result_num : {len(masks_list)}'
|
| 108 |
+
# mask_grey_result_list = mask2greyimg(masks_list, False)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def show_result(result_index):
|
| 112 |
+
return result_list[int(result_index)]
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def show_mask_result(mask_result_index):
|
| 116 |
+
return mask_result_list[int(mask_result_index)]
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
with gr.Blocks() as demo:
|
| 120 |
+
with gr.Row():
|
| 121 |
+
img = gr.Image(None, width=400, height=400, label='input_image', type='numpy')
|
| 122 |
+
result_img = gr.Image(None, width=400, height=400, label='output_image', type='numpy')
|
| 123 |
+
with gr.Row():
|
| 124 |
+
pos_point = gr.Checkbox(value=True, label='pos_point')
|
| 125 |
+
prompt_mode = gr.Dropdown(choices=['point', 'box', 'text'], value='point', label='prompt_mode')
|
| 126 |
+
with gr.Row():
|
| 127 |
+
point_coords_text = gr.Textbox(value=str(point_coords), interactive=True, label='point_coords')
|
| 128 |
+
point_labels_text = gr.Textbox(value=str(point_labels), interactive=True, label='point_labels')
|
| 129 |
+
reset_points_bu = gr.Button(value='reset_points')
|
| 130 |
+
reset_points_bu.click(fn=reset_points, inputs=[img], outputs=[result_img, point_coords_text, point_labels_text])
|
| 131 |
+
with gr.Row():
|
| 132 |
+
boxes_point_text = gr.Textbox(value=str(boxes_point), interactive=True, label='boxes_point')
|
| 133 |
+
boxes_text = gr.Textbox(value=str(boxes), interactive=True, label='boxes')
|
| 134 |
+
reset_boxes_bu = gr.Button(value='reset_boxes')
|
| 135 |
+
reset_boxes_bu.click(fn=reset_boxes, inputs=[img], outputs=[result_img, boxes_point_text, boxes_text])
|
| 136 |
+
with gr.Row():
|
| 137 |
+
text_prompt = gr.Textbox(value='', interactive=True, label='text_prompt')
|
| 138 |
+
with gr.Row():
|
| 139 |
+
sam_ckpt_path = gr.Dropdown(choices=['weights/sam/mobile_sam.pt'],
|
| 140 |
+
value='weights/sam/mobile_sam.pt',
|
| 141 |
+
label='SAM ckpt_path')
|
| 142 |
+
sam_version = gr.Dropdown(choices=['mobile_sam'],
|
| 143 |
+
value='mobile_sam',
|
| 144 |
+
label='SAM version')
|
| 145 |
+
load_sam_bu = gr.Button(value='load SAM')
|
| 146 |
+
sam_load_text = gr.Textbox(value='', interactive=True, label='sam_load')
|
| 147 |
+
load_sam_bu.click(fn=load_sam, inputs=[sam_ckpt_path, sam_version], outputs=sam_load_text)
|
| 148 |
+
with gr.Row():
|
| 149 |
+
result_num_text = gr.Textbox(value='', interactive=True, label='result_num')
|
| 150 |
+
result_index = gr.Number(value=0, label='result_index')
|
| 151 |
+
show_result_bu = gr.Button(value='show_result')
|
| 152 |
+
show_result_bu.click(fn=show_result, inputs=[result_index], outputs=[result_img])
|
| 153 |
+
with gr.Row():
|
| 154 |
+
mask_result_num_text = gr.Textbox(value='', interactive=True, label='mask_result_num')
|
| 155 |
+
mask_result_index = gr.Number(value=0, label='mask_result_index')
|
| 156 |
+
show_mask_result_bu = gr.Button(value='show_mask_result')
|
| 157 |
+
show_mask_result_bu.click(fn=show_mask_result, inputs=[mask_result_index], outputs=[result_img])
|
| 158 |
+
with gr.Row():
|
| 159 |
+
generate_masks_bu = gr.Button(value='SAM generate masks')
|
| 160 |
+
sam_text = gr.Textbox(value='', interactive=True, label='SAM')
|
| 161 |
+
generate_masks_bu.click(fn=generate_mask, inputs=[img, prompt_mode, text_prompt], outputs=[sam_text, result_num_text, mask_result_num_text])
|
| 162 |
+
img.select(show_prompt, [img, prompt_mode, pos_point], [result_img, point_coords_text, point_labels_text, boxes_point_text, boxes_text])
|
| 163 |
+
if __name__ == '__main__':
|
| 164 |
+
demo.launch()
|