HaMeR / hamer /utils /utils_detectron2.py
geopavlakos's picture
Initial commit
d7a991a
raw
history blame
3.74 kB
import detectron2.data.transforms as T
import torch
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import CfgNode, instantiate
from detectron2.data import MetadataCatalog
from omegaconf import OmegaConf
class DefaultPredictor_Lazy:
"""Create a simple end-to-end predictor with the given config that runs on single device for a
single input image.
Compared to using the model directly, this class does the following additions:
1. Load checkpoint from the weights specified in config (cfg.MODEL.WEIGHTS).
2. Always take BGR image as the input and apply format conversion internally.
3. Apply resizing defined by the config (`cfg.INPUT.{MIN,MAX}_SIZE_TEST`).
4. Take one input image and produce a single output, instead of a batch.
This is meant for simple demo purposes, so it does the above steps automatically.
This is not meant for benchmarks or running complicated inference logic.
If you'd like to do anything more complicated, please refer to its source code as
examples to build and use the model manually.
Attributes:
metadata (Metadata): the metadata of the underlying dataset, obtained from
test dataset name in the config.
Examples:
::
pred = DefaultPredictor(cfg)
inputs = cv2.imread("input.jpg")
outputs = pred(inputs)
"""
def __init__(self, cfg):
"""
Args:
cfg: a yacs CfgNode or a omegaconf dict object.
"""
if isinstance(cfg, CfgNode):
self.cfg = cfg.clone() # cfg can be modified by model
self.model = build_model(self.cfg) # noqa: F821
if len(cfg.DATASETS.TEST):
test_dataset = cfg.DATASETS.TEST[0]
checkpointer = DetectionCheckpointer(self.model)
checkpointer.load(cfg.MODEL.WEIGHTS)
self.aug = T.ResizeShortestEdge(
[cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
)
self.input_format = cfg.INPUT.FORMAT
else: # new LazyConfig
self.cfg = cfg
self.model = instantiate(cfg.model)
test_dataset = OmegaConf.select(cfg, "dataloader.test.dataset.names", default=None)
if isinstance(test_dataset, (list, tuple)):
test_dataset = test_dataset[0]
checkpointer = DetectionCheckpointer(self.model)
checkpointer.load(OmegaConf.select(cfg, "train.init_checkpoint", default=""))
mapper = instantiate(cfg.dataloader.test.mapper)
self.aug = mapper.augmentations
self.input_format = mapper.image_format
self.model.eval().cuda()
if test_dataset:
self.metadata = MetadataCatalog.get(test_dataset)
assert self.input_format in ["RGB", "BGR"], self.input_format
def __call__(self, original_image):
"""
Args:
original_image (np.ndarray): an image of shape (H, W, C) (in BGR order).
Returns:
predictions (dict):
the output of the model for one image only.
See :doc:`/tutorials/models` for details about the format.
"""
with torch.no_grad():
if self.input_format == "RGB":
original_image = original_image[:, :, ::-1]
height, width = original_image.shape[:2]
image = self.aug(T.AugInput(original_image)).apply_image(original_image)
image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
inputs = {"image": image, "height": height, "width": width}
predictions = self.model([inputs])[0]
return predictions