|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import json |
|
import logging |
|
import itertools |
|
|
|
import detectron2.utils.comm as comm |
|
from detectron2.evaluation.evaluator import DatasetEvaluator |
|
|
|
from caption_pycocotools.coco import COCO |
|
from pycocoevalcap.eval import COCOEvalCap |
|
|
|
|
|
class CaptioningEvaluator(DatasetEvaluator): |
|
""" |
|
Evaluate AR for object proposals, AP for instance detection/segmentation, AP |
|
for keypoint detection outputs using COCO's metrics. |
|
See http://cocodataset.org/#detection-eval and |
|
http://cocodataset.org/#keypoints-eval to understand its metrics. |
|
The metrics range from 0 to 100 (instead of 0 to 1), where a -1 or NaN means |
|
the metric cannot be computed (e.g. due to no predictions made). |
|
In addition to COCO, this evaluator is able to support any bounding box detection, |
|
instance segmentation, or keypoint detection dataset. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
distributed=True, |
|
output_dir=None, |
|
gt_json=None, |
|
): |
|
""" |
|
Args: |
|
dataset_name (str): name of the dataset to be evaluated. |
|
It must have either the following corresponding metadata: |
|
"json_file": the path to the COCO format annotation |
|
Or it must be in detectron2's standard dataset format |
|
so it can be converted to COCO format automatically. |
|
tasks (tuple[str]): tasks that can be evaluated under the given |
|
configuration. A task is one of "bbox", "segm", "keypoints". |
|
By default, will infer this automatically from predictions. |
|
distributed (True): if True, will collect results from all ranks and run evaluation |
|
in the main process. |
|
Otherwise, will only evaluate the results in the current process. |
|
output_dir (str): optional, an output directory to dump all |
|
results predicted on the dataset. The dump contains two files: |
|
1. "instances_predictions.pth" a file that can be loaded with `torch.load` and |
|
contains all the results in the format they are produced by the model. |
|
2. "coco_instances_results.json" a json file in COCO's result format. |
|
max_dets_per_image (int): limit on the maximum number of detections per image. |
|
By default in COCO, this limit is to 100, but this can be customized |
|
to be greater, as is needed in evaluation metrics AP fixed and AP pool |
|
(see https://arxiv.org/pdf/2102.01066.pdf) |
|
This doesn't affect keypoint evaluation. |
|
use_fast_impl (bool): use a fast but **unofficial** implementation to compute AP. |
|
Although the results should be very close to the official implementation in COCO |
|
API, it is still recommended to compute results with the official API for use in |
|
papers. The faster implementation also uses more RAM. |
|
kpt_oks_sigmas (list[float]): The sigmas used to calculate keypoint OKS. |
|
See http://cocodataset.org/#keypoints-eval |
|
When empty, it will use the defaults in COCO. |
|
Otherwise it should be the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS. |
|
allow_cached_coco (bool): Whether to use cached coco json from previous validation |
|
runs. You should set this to False if you need to use different validation data. |
|
Defaults to True. |
|
""" |
|
self._logger = logging.getLogger(__name__) |
|
self._distributed = distributed |
|
self._output_dir = output_dir |
|
self._gt_json = COCO(gt_json) |
|
|
|
def reset(self): |
|
self._gen_captions = [] |
|
self._image_ids = [] |
|
|
|
def process(self, inputs, outputs): |
|
""" |
|
Args: |
|
inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). |
|
It is a list of dict. Each dict corresponds to an image and |
|
contains keys like "height", "width", "file_name", "image_id". |
|
outputs: the outputs of a COCO model. It is a list of dicts with key |
|
"instances" that contains :class:`Instances`. |
|
""" |
|
for output in outputs: |
|
self._image_ids.append(output['image_id']) |
|
self._gen_captions.append(output['captioning_text']) |
|
|
|
def evaluate(self, img_ids=None): |
|
""" |
|
Args: |
|
img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset |
|
""" |
|
|
|
if self._distributed: |
|
comm.synchronize() |
|
def gather(x, move=False): |
|
x = comm.gather(x) |
|
x = list(itertools.chain(*x)) |
|
if move: |
|
x = [xx.to(self._gen_captions[0].device) for xx in x] |
|
return x |
|
gen_captions = gather(self._gen_captions) |
|
image_ids = gather(self._image_ids) |
|
if not comm.is_main_process(): |
|
return {} |
|
else: |
|
gen_captions = self._gen_captions |
|
image_ids = self._image_ids |
|
|
|
assert len(gen_captions) == len(image_ids) |
|
pred_captions = [{"image_id": image_id, "caption": gen_caption} for image_id, gen_caption in zip(image_ids, gen_captions)] |
|
pred_pth = os.path.join(self._output_dir, 'results.json') |
|
json.dump(pred_captions, open(pred_pth, "w")) |
|
|
|
gt_captions = self._gt_json |
|
pred_captions = gt_captions.loadRes(pred_pth) |
|
|
|
cocoEval = COCOEvalCap(gt_captions, pred_captions) |
|
cocoEval.params['image_id'] = pred_captions.getImgIds() |
|
cocoEval.evaluate() |
|
return cocoEval.eval |