|
import os |
|
import torch |
|
from itertools import compress |
|
from detectron2.config import get_cfg |
|
from detectron2.data.detection_utils import read_image |
|
|
|
|
|
WINDOW_NAME = "GRiT" |
|
CUR_DIR = os.path.dirname(os.path.abspath(__file__)) |
|
from vbench.utils import CACHE_DIR |
|
|
|
|
|
|
|
import sys |
|
sys.path.append(os.path.join(CUR_DIR, './centernet2/')) |
|
from centernet.config import add_centernet_config |
|
|
|
from .grit.config import add_grit_config |
|
from .grit.predictor import VisualizationDemo |
|
|
|
class ObjDescription: |
|
def __init__(self, object_descriptions): |
|
self.data = object_descriptions |
|
|
|
def __getitem__(self, item): |
|
assert type(item) == torch.Tensor |
|
assert item.dim() == 1 |
|
if len(item) > 0: |
|
assert item.dtype == torch.int64 or item.dtype == torch.bool |
|
if item.dtype == torch.int64: |
|
return ObjDescription([self.data[x.item()] for x in item]) |
|
elif item.dtype == torch.bool: |
|
return ObjDescription(list(compress(self.data, item))) |
|
|
|
return ObjDescription(list(compress(self.data, item))) |
|
|
|
def __len__(self): |
|
return len(self.data) |
|
|
|
def __repr__(self): |
|
return "ObjDescription({})".format(self.data) |
|
|
|
def dense_pred_to_caption(predictions): |
|
boxes = predictions["instances"].pred_boxes if predictions["instances"].has("pred_boxes") else None |
|
object_description = predictions["instances"].pred_object_descriptions.data |
|
new_caption = "" |
|
for i in range(len(object_description)): |
|
new_caption += (object_description[i] + ": " + str([int(a) for a in boxes[i].tensor.cpu().detach().numpy()[0]])) + "; " |
|
return new_caption |
|
|
|
def dense_pred_to_caption_only_name(predictions): |
|
object_description = predictions["instances"].pred_object_descriptions.data |
|
new_caption = ",".join(object_description) |
|
del predictions |
|
return new_caption |
|
|
|
def dense_pred_to_caption_tuple(predictions): |
|
boxes = predictions["instances"].pred_boxes if predictions["instances"].has("pred_boxes") else None |
|
object_description = predictions["instances"].pred_object_descriptions.data |
|
object_type = predictions["instances"].det_obj.data |
|
new_caption = [] |
|
for i in range(len(object_description)): |
|
|
|
new_caption.append((object_description[i], [int(a) for a in boxes[i].tensor.cpu().detach().numpy()[0]], object_type)) |
|
return new_caption |
|
|
|
def setup_cfg(args): |
|
cfg = get_cfg() |
|
if args["cpu"]: |
|
cfg.MODEL.DEVICE="cpu" |
|
add_centernet_config(cfg) |
|
add_grit_config(cfg) |
|
cfg.merge_from_file(args["config_file"]) |
|
cfg.merge_from_list(args["opts"]) |
|
|
|
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args["confidence_threshold"] |
|
cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args["confidence_threshold"] |
|
if args["test_task"]: |
|
cfg.MODEL.TEST_TASK = args["test_task"] |
|
cfg.MODEL.BEAM_SIZE = 1 |
|
cfg.MODEL.ROI_HEADS.SOFT_NMS_ENABLED = False |
|
cfg.USE_ACT_CHECKPOINT = False |
|
cfg.freeze() |
|
return cfg |
|
|
|
|
|
def get_parser(device, model_weight=f"{CACHE_DIR}/grit_model/grit_b_densecap_objectdet.pth"): |
|
arg_dict = {'config_file': f"{CUR_DIR}/configs/GRiT_B_DenseCap_ObjectDet.yaml", 'cpu': False, 'confidence_threshold': 0.5, 'test_task': 'DenseCap', 'opts': ["MODEL.WEIGHTS", model_weight]} |
|
if device.type == "cpu": |
|
arg_dict["cpu"] = True |
|
return arg_dict |
|
|
|
def image_caption_api(image_src, device, model_weight): |
|
args2 = get_parser(device, model_weight) |
|
cfg = setup_cfg(args2) |
|
demo = VisualizationDemo(cfg) |
|
if image_src: |
|
img = read_image(image_src, format="BGR") |
|
predictions, visualized_output = demo.run_on_image(img) |
|
new_caption = dense_pred_to_caption(predictions) |
|
return new_caption |
|
|
|
def init_demo(device, model_weight, task="DenseCap"): |
|
args2 = get_parser(device, model_weight) |
|
if task!="DenseCap": |
|
args2["test_task"]=task |
|
cfg = setup_cfg(args2) |
|
|
|
demo = VisualizationDemo(cfg) |
|
return demo |
|
|