Spaces:

KyanChen
/

RSPrompter

Runtime error

App Files Files Community

KyanChen commited on Jun 30, 2023

Commit

4d0eb62

1 Parent(s): 1c3eb47

Upload 303 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
mmpretrain/__init__.py +28 -0
mmpretrain/annotations/WHU_building_test.json +3 -0
mmpretrain/annotations/WHU_building_train.json +3 -0
mmpretrain/annotations/WHU_building_val.json +0 -0
mmpretrain/apis/__init__.py +22 -0
mmpretrain/apis/base.py +390 -0
mmpretrain/apis/feature_extractor.py +128 -0
mmpretrain/apis/image_caption.py +164 -0
mmpretrain/apis/image_classification.py +221 -0
mmpretrain/apis/image_retrieval.py +285 -0
mmpretrain/apis/model.py +408 -0
mmpretrain/apis/multimodal_retrieval.py +603 -0
mmpretrain/apis/nlvr.py +150 -0
mmpretrain/apis/utils.py +270 -0
mmpretrain/apis/visual_grounding.py +180 -0
mmpretrain/apis/visual_question_answering.py +181 -0
mmpretrain/datasets/__init__.py +54 -0
mmpretrain/datasets/base_dataset.py +219 -0
mmpretrain/datasets/builder.py +25 -0
mmpretrain/datasets/caltech101.py +113 -0
mmpretrain/datasets/categories.py +1440 -0
mmpretrain/datasets/cifar.py +210 -0
mmpretrain/datasets/coco_caption.py +42 -0
mmpretrain/datasets/coco_retrieval.py +77 -0
mmpretrain/datasets/coco_vqa.py +114 -0
mmpretrain/datasets/cub.py +142 -0
mmpretrain/datasets/custom.py +287 -0
mmpretrain/datasets/dataset_wrappers.py +176 -0
mmpretrain/datasets/dtd.py +116 -0
mmpretrain/datasets/fgvcaircraft.py +98 -0
mmpretrain/datasets/flamingo.py +295 -0
mmpretrain/datasets/flowers102.py +104 -0
mmpretrain/datasets/food101.py +102 -0
mmpretrain/datasets/imagenet.py +102 -0
mmpretrain/datasets/inshop.py +157 -0
mmpretrain/datasets/mnist.py +220 -0
mmpretrain/datasets/multi_label.py +85 -0
mmpretrain/datasets/multi_task.py +337 -0
mmpretrain/datasets/nlvr2.py +36 -0
mmpretrain/datasets/oxfordiiitpet.py +97 -0
mmpretrain/datasets/places205.py +40 -0
mmpretrain/datasets/refcoco.py +81 -0
mmpretrain/datasets/samplers/__init__.py +5 -0
mmpretrain/datasets/samplers/repeat_aug.py +101 -0
mmpretrain/datasets/samplers/sequential.py +56 -0
mmpretrain/datasets/scienceqa.py +104 -0
mmpretrain/datasets/stanfordcars.py +148 -0
mmpretrain/datasets/sun397.py +225 -0
mmpretrain/datasets/transforms/__init__.py +36 -0

.gitattributes CHANGED Viewed

@@ -35,3 +35,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 data/WHU/annotations/WHU_building_test.json filter=lfs diff=lfs merge=lfs -text
 data/WHU/annotations/WHU_building_train.json filter=lfs diff=lfs merge=lfs -text

 *tfevents* filter=lfs diff=lfs merge=lfs -text
 data/WHU/annotations/WHU_building_test.json filter=lfs diff=lfs merge=lfs -text
 data/WHU/annotations/WHU_building_train.json filter=lfs diff=lfs merge=lfs -text
+mmpretrain/annotations/WHU_building_test.json filter=lfs diff=lfs merge=lfs -text
+mmpretrain/annotations/WHU_building_train.json filter=lfs diff=lfs merge=lfs -text

mmpretrain/__init__.py ADDED Viewed

	@@ -0,0 +1,28 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import mmcv
+import mmengine
+from mmengine.utils import digit_version
+from .apis import *  # noqa: F401, F403
+from .version import __version__
+mmcv_minimum_version = '2.0.0rc4'
+mmcv_maximum_version = '2.1.0'
+mmcv_version = digit_version(mmcv.__version__)
+mmengine_minimum_version = '0.7.1'
+mmengine_maximum_version = '1.0.0'
+mmengine_version = digit_version(mmengine.__version__)
+assert (mmcv_version >= digit_version(mmcv_minimum_version)
+        and mmcv_version < digit_version(mmcv_maximum_version)), \
+    f'MMCV=={mmcv.__version__} is used but incompatible. ' \
+    f'Please install mmcv>={mmcv_minimum_version}, <{mmcv_maximum_version}.'
+assert (mmengine_version >= digit_version(mmengine_minimum_version)
+        and mmengine_version < digit_version(mmengine_maximum_version)), \
+    f'MMEngine=={mmengine.__version__} is used but incompatible. ' \
+    f'Please install mmengine>={mmengine_minimum_version}, ' \
+    f'<{mmengine_maximum_version}.'
+__all__ = ['__version__']

mmpretrain/annotations/WHU_building_test.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c5845dd19a3ec84aa3bc978ad5dc8066b43569c4ac9ff12c954d96208ec13432
+size 13511169

mmpretrain/annotations/WHU_building_train.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28c490b7c80e6900a5b4da522faee91c6251589a0c9ebb258e79221c2586d2fa
+size 42910976

mmpretrain/annotations/WHU_building_val.json ADDED Viewed

The diff for this file is too large to render. See raw diff

mmpretrain/apis/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from .base import BaseInferencer
+from .feature_extractor import FeatureExtractor
+from .image_caption import ImageCaptionInferencer
+from .image_classification import ImageClassificationInferencer
+from .image_retrieval import ImageRetrievalInferencer
+from .model import (ModelHub, get_model, inference_model, init_model,
+                    list_models)
+from .multimodal_retrieval import (ImageToTextRetrievalInferencer,
+                                   TextToImageRetrievalInferencer)
+from .nlvr import NLVRInferencer
+from .visual_grounding import VisualGroundingInferencer
+from .visual_question_answering import VisualQuestionAnsweringInferencer
+__all__ = [
+    'init_model', 'inference_model', 'list_models', 'get_model', 'ModelHub',
+    'ImageClassificationInferencer', 'ImageRetrievalInferencer',
+    'FeatureExtractor', 'ImageCaptionInferencer',
+    'TextToImageRetrievalInferencer', 'VisualGroundingInferencer',
+    'VisualQuestionAnsweringInferencer', 'ImageToTextRetrievalInferencer',
+    'BaseInferencer', 'NLVRInferencer'
+]

mmpretrain/apis/base.py ADDED Viewed

	@@ -0,0 +1,390 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from abc import abstractmethod
+from math import ceil
+from typing import Callable, Iterable, List, Optional, Tuple, Union
+import numpy as np
+import torch
+from mmengine.config import Config
+from mmengine.dataset import default_collate
+from mmengine.fileio import get_file_backend
+from mmengine.model import BaseModel
+from mmengine.runner import load_checkpoint
+from mmpretrain.structures import DataSample
+from mmpretrain.utils import track
+from .model import get_model, list_models
+ModelType = Union[BaseModel, str, Config]
+InputType = Union[str, np.ndarray, list]
+class BaseInferencer:
+    """Base inferencer for various tasks.
+    The BaseInferencer provides the standard workflow for inference as follows:
+    1. Preprocess the input data by :meth:`preprocess`.
+    2. Forward the data to the model by :meth:`forward`. ``BaseInferencer``
+       assumes the model inherits from :class:`mmengine.models.BaseModel` and
+       will call `model.test_step` in :meth:`forward` by default.
+    3. Visualize the results by :meth:`visualize`.
+    4. Postprocess and return the results by :meth:`postprocess`.
+    When we call the subclasses inherited from BaseInferencer (not overriding
+    ``__call__``), the workflow will be executed in order.
+    All subclasses of BaseInferencer could define the following class
+    attributes for customization:
+    - ``preprocess_kwargs``: The keys of the kwargs that will be passed to
+      :meth:`preprocess`.
+    - ``forward_kwargs``: The keys of the kwargs that will be passed to
+      :meth:`forward`
+    - ``visualize_kwargs``: The keys of the kwargs that will be passed to
+      :meth:`visualize`
+    - ``postprocess_kwargs``: The keys of the kwargs that will be passed to
+      :meth:`postprocess`
+    All attributes mentioned above should be a ``set`` of keys (strings),
+    and each key should not be duplicated. Actually, :meth:`__call__` will
+    dispatch all the arguments to the corresponding methods according to the
+    ``xxx_kwargs`` mentioned above.
+    Subclasses inherited from ``BaseInferencer`` should implement
+    :meth:`_init_pipeline`, :meth:`visualize` and :meth:`postprocess`:
+    - _init_pipeline: Return a callable object to preprocess the input data.
+    - visualize: Visualize the results returned by :meth:`forward`.
+    - postprocess: Postprocess the results returned by :meth:`forward` and
+      :meth:`visualize`.
+    Args:
+        model (BaseModel | str | Config): A model name or a path to the config
+            file, or a :obj:`BaseModel` object. The model name can be found
+            by ``cls.list_models()`` and you can also query it in
+            :doc:`/modelzoo_statistics`.
+        pretrained (str, optional): Path to the checkpoint. If None, it will
+            try to find a pre-defined weight from the model you specified
+            (only work if the ``model`` is a model name). Defaults to None.
+        device (str | torch.device | None): Transfer the model to the target
+            device. Defaults to None.
+        device_map (str | dict | None): A map that specifies where each
+            submodule should go. It doesn't need to be refined to each
+            parameter/buffer name, once a given module name is inside, every
+            submodule of it will be sent to the same device. You can use
+            `device_map="auto"` to automatically generate the device map.
+            Defaults to None.
+        offload_folder (str | None): If the `device_map` contains any value
+            `"disk"`, the folder where we will offload weights.
+        **kwargs: Other keyword arguments to initialize the model (only work if
+            the ``model`` is a model name).
+    """
+    preprocess_kwargs: set = set()
+    forward_kwargs: set = set()
+    visualize_kwargs: set = set()
+    postprocess_kwargs: set = set()
+    def __init__(self,
+                 model: ModelType,
+                 pretrained: Union[bool, str] = True,
+                 device: Union[str, torch.device, None] = None,
+                 device_map=None,
+                 offload_folder=None,
+                 **kwargs) -> None:
+        if isinstance(model, BaseModel):
+            if isinstance(pretrained, str):
+                load_checkpoint(model, pretrained, map_location='cpu')
+            if device_map is not None:
+                from .utils import dispatch_model
+                model = dispatch_model(
+                    model,
+                    device_map=device_map,
+                    offload_folder=offload_folder)
+            elif device is not None:
+                model.to(device)
+        else:
+            model = get_model(
+                model,
+                pretrained,
+                device=device,
+                device_map=device_map,
+                offload_folder=offload_folder,
+                **kwargs)
+        model.eval()
+        self.config = model._config
+        self.model = model
+        self.pipeline = self._init_pipeline(self.config)
+        self.visualizer = None
+    def __call__(
+        self,
+        inputs,
+        return_datasamples: bool = False,
+        batch_size: int = 1,
+        **kwargs,
+    ) -> dict:
+        """Call the inferencer.
+        Args:
+            inputs (InputsType): Inputs for the inferencer.
+            return_datasamples (bool): Whether to return results as
+                :obj:`BaseDataElement`. Defaults to False.
+            batch_size (int): Batch size. Defaults to 1.
+            **kwargs: Key words arguments passed to :meth:`preprocess`,
+                :meth:`forward`, :meth:`visualize` and :meth:`postprocess`.
+                Each key in kwargs should be in the corresponding set of
+                ``preprocess_kwargs``, ``forward_kwargs``, ``visualize_kwargs``
+                and ``postprocess_kwargs``.
+        Returns:
+            dict: Inference and visualization results.
+        """
+        (
+            preprocess_kwargs,
+            forward_kwargs,
+            visualize_kwargs,
+            postprocess_kwargs,
+        ) = self._dispatch_kwargs(**kwargs)
+        ori_inputs = self._inputs_to_list(inputs)
+        inputs = self.preprocess(
+            ori_inputs, batch_size=batch_size, **preprocess_kwargs)
+        preds = []
+        for data in track(
+                inputs, 'Inference', total=ceil(len(ori_inputs) / batch_size)):
+            preds.extend(self.forward(data, **forward_kwargs))
+        visualization = self.visualize(ori_inputs, preds, **visualize_kwargs)
+        results = self.postprocess(preds, visualization, return_datasamples,
+                                   **postprocess_kwargs)
+        return results
+    def _inputs_to_list(self, inputs: InputType) -> list:
+        """Preprocess the inputs to a list.
+        Cast the input data to a list of data.
+        - list or tuple: return inputs
+        - str:
+            - Directory path: return all files in the directory
+            - other cases: return a list containing the string. The string
+              could be a path to file, a url or other types of string according
+              to the task.
+        - other: return a list with one item.
+        Args:
+            inputs (str | array | list): Inputs for the inferencer.
+        Returns:
+            list: List of input for the :meth:`preprocess`.
+        """
+        if isinstance(inputs, str):
+            backend = get_file_backend(inputs)
+            if hasattr(backend, 'isdir') and backend.isdir(inputs):
+                # Backends like HttpsBackend do not implement `isdir`, so only
+                # those backends that implement `isdir` could accept the inputs
+                # as a directory
+                file_list = backend.list_dir_or_file(inputs, list_dir=False)
+                inputs = [
+                    backend.join_path(inputs, file) for file in file_list
+                ]
+        if not isinstance(inputs, (list, tuple)):
+            inputs = [inputs]
+        return list(inputs)
+    def preprocess(self, inputs: InputType, batch_size: int = 1, **kwargs):
+        """Process the inputs into a model-feedable format.
+        Customize your preprocess by overriding this method. Preprocess should
+        return an iterable object, of which each item will be used as the
+        input of ``model.test_step``.
+        ``BaseInferencer.preprocess`` will return an iterable chunked data,
+        which will be used in __call__ like this:
+        .. code-block:: python
+            def __call__(self, inputs, batch_size=1, **kwargs):
+                chunked_data = self.preprocess(inputs, batch_size, **kwargs)
+                for batch in chunked_data:
+                    preds = self.forward(batch, **kwargs)
+        Args:
+            inputs (InputsType): Inputs given by user.
+            batch_size (int): batch size. Defaults to 1.
+        Yields:
+            Any: Data processed by the ``pipeline`` and ``default_collate``.
+        """
+        chunked_data = self._get_chunk_data(
+            map(self.pipeline, inputs), batch_size)
+        yield from map(default_collate, chunked_data)
+    @torch.no_grad()
+    def forward(self, inputs: Union[dict, tuple], **kwargs):
+        """Feed the inputs to the model."""
+        return self.model.test_step(inputs)
+    def visualize(self,
+                  inputs: list,
+                  preds: List[DataSample],
+                  show: bool = False,
+                  **kwargs) -> List[np.ndarray]:
+        """Visualize predictions.
+        Customize your visualization by overriding this method. visualize
+        should return visualization results, which could be np.ndarray or any
+        other objects.
+        Args:
+            inputs (list): Inputs preprocessed by :meth:`_inputs_to_list`.
+            preds (Any): Predictions of the model.
+            show (bool): Whether to display the image in a popup window.
+                Defaults to False.
+        Returns:
+            List[np.ndarray]: Visualization results.
+        """
+        if show:
+            raise NotImplementedError(
+                f'The `visualize` method of {self.__class__.__name__} '
+                'is not implemented.')
+    @abstractmethod
+    def postprocess(
+        self,
+        preds: List[DataSample],
+        visualization: List[np.ndarray],
+        return_datasample=False,
+        **kwargs,
+    ) -> dict:
+        """Process the predictions and visualization results from ``forward``
+        and ``visualize``.
+        This method should be responsible for the following tasks:
+        1. Convert datasamples into a json-serializable dict if needed.
+        2. Pack the predictions and visualization results and return them.
+        3. Dump or log the predictions.
+        Customize your postprocess by overriding this method. Make sure
+        ``postprocess`` will return a dict with visualization results and
+        inference results.
+        Args:
+            preds (List[Dict]): Predictions of the model.
+            visualization (np.ndarray): Visualized predictions.
+            return_datasample (bool): Whether to return results as datasamples.
+                Defaults to False.
+        Returns:
+            dict: Inference and visualization results with key ``predictions``
+            and ``visualization``
+            - ``visualization (Any)``: Returned by :meth:`visualize`
+            - ``predictions`` (dict or DataSample): Returned by
+              :meth:`forward` and processed in :meth:`postprocess`.
+              If ``return_datasample=False``, it usually should be a
+              json-serializable dict containing only basic data elements such
+              as strings and numbers.
+        """
+    @abstractmethod
+    def _init_pipeline(self, cfg: Config) -> Callable:
+        """Initialize the test pipeline.
+        Return a pipeline to handle various input data, such as ``str``,
+        ``np.ndarray``. It is an abstract method in BaseInferencer, and should
+        be implemented in subclasses.
+        The returned pipeline will be used to process a single data.
+        It will be used in :meth:`preprocess` like this:
+        .. code-block:: python
+            def preprocess(self, inputs, batch_size, **kwargs):
+                ...
+                dataset = map(self.pipeline, dataset)
+                ...
+        """
+    def _get_chunk_data(self, inputs: Iterable, chunk_size: int):
+        """Get batch data from dataset.
+        Args:
+            inputs (Iterable): An iterable dataset.
+            chunk_size (int): Equivalent to batch size.
+        Yields:
+            list: batch data.
+        """
+        inputs_iter = iter(inputs)
+        while True:
+            try:
+                chunk_data = []
+                for _ in range(chunk_size):
+                    processed_data = next(inputs_iter)
+                    chunk_data.append(processed_data)
+                yield chunk_data
+            except StopIteration:
+                if chunk_data:
+                    yield chunk_data
+                break
+    def _dispatch_kwargs(self, **kwargs) -> Tuple[dict, dict, dict, dict]:
+        """Dispatch kwargs to preprocess(), forward(), visualize() and
+        postprocess() according to the actual demands.
+        Returns:
+            Tuple[Dict, Dict, Dict, Dict]: kwargs passed to preprocess,
+            forward, visualize and postprocess respectively.
+        """
+        # Ensure each argument only matches one function
+        method_kwargs = self.preprocess_kwargs | self.forward_kwargs | \
+            self.visualize_kwargs | self.postprocess_kwargs
+        union_kwargs = method_kwargs | set(kwargs.keys())
+        if union_kwargs != method_kwargs:
+            unknown_kwargs = union_kwargs - method_kwargs
+            raise ValueError(
+                f'unknown argument {unknown_kwargs} for `preprocess`, '
+                '`forward`, `visualize` and `postprocess`')
+        preprocess_kwargs = {}
+        forward_kwargs = {}
+        visualize_kwargs = {}
+        postprocess_kwargs = {}
+        for key, value in kwargs.items():
+            if key in self.preprocess_kwargs:
+                preprocess_kwargs[key] = value
+            if key in self.forward_kwargs:
+                forward_kwargs[key] = value
+            if key in self.visualize_kwargs:
+                visualize_kwargs[key] = value
+            if key in self.postprocess_kwargs:
+                postprocess_kwargs[key] = value
+        return (
+            preprocess_kwargs,
+            forward_kwargs,
+            visualize_kwargs,
+            postprocess_kwargs,
+        )
+    @staticmethod
+    def list_models(pattern: Optional[str] = None):
+        """List models defined in metafile of corresponding packages.
+        Args:
+            pattern (str | None): A wildcard pattern to match model names.
+        Returns:
+            List[str]: a list of model names.
+        """
+        return list_models(pattern=pattern)

mmpretrain/apis/feature_extractor.py ADDED Viewed

	@@ -0,0 +1,128 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Callable, List, Optional, Union
+import torch
+from mmcv.image import imread
+from mmengine.config import Config
+from mmengine.dataset import Compose, default_collate
+from mmpretrain.registry import TRANSFORMS
+from .base import BaseInferencer, InputType
+from .model import list_models
+class FeatureExtractor(BaseInferencer):
+    """The inferencer for extract features.
+    Args:
+        model (BaseModel | str | Config): A model name or a path to the config
+            file, or a :obj:`BaseModel` object. The model name can be found
+            by ``FeatureExtractor.list_models()`` and you can also query it in
+            :doc:`/modelzoo_statistics`.
+        pretrained (str, optional): Path to the checkpoint. If None, it will
+            try to find a pre-defined weight from the model you specified
+            (only work if the ``model`` is a model name). Defaults to None.
+        device (str, optional): Device to run inference. If None, the available
+            device will be automatically used. Defaults to None.
+        **kwargs: Other keyword arguments to initialize the model (only work if
+            the ``model`` is a model name).
+    Example:
+        >>> from mmpretrain import FeatureExtractor
+        >>> inferencer = FeatureExtractor('resnet50_8xb32_in1k', backbone=dict(out_indices=(0, 1, 2, 3)))
+        >>> feats = inferencer('demo/demo.JPEG', stage='backbone')[0]
+        >>> for feat in feats:
+        >>>     print(feat.shape)
+        torch.Size([256, 56, 56])
+        torch.Size([512, 28, 28])
+        torch.Size([1024, 14, 14])
+        torch.Size([2048, 7, 7])
+    """  # noqa: E501
+    def __call__(self,
+                 inputs: InputType,
+                 batch_size: int = 1,
+                 **kwargs) -> dict:
+        """Call the inferencer.
+        Args:
+            inputs (str | array | list): The image path or array, or a list of
+                images.
+            batch_size (int): Batch size. Defaults to 1.
+            **kwargs: Other keyword arguments accepted by the `extract_feat`
+                method of the model.
+        Returns:
+            tensor | Tuple[tensor]: The extracted features.
+        """
+        ori_inputs = self._inputs_to_list(inputs)
+        inputs = self.preprocess(ori_inputs, batch_size=batch_size)
+        preds = []
+        for data in inputs:
+            preds.extend(self.forward(data, **kwargs))
+        return preds
+    @torch.no_grad()
+    def forward(self, inputs: Union[dict, tuple], **kwargs):
+        inputs = self.model.data_preprocessor(inputs, False)['inputs']
+        outputs = self.model.extract_feat(inputs, **kwargs)
+        def scatter(feats, index):
+            if isinstance(feats, torch.Tensor):
+                return feats[index]
+            else:
+                # Sequence of tensor
+                return type(feats)([scatter(item, index) for item in feats])
+        results = []
+        for i in range(inputs.shape[0]):
+            results.append(scatter(outputs, i))
+        return results
+    def _init_pipeline(self, cfg: Config) -> Callable:
+        test_pipeline_cfg = cfg.test_dataloader.dataset.pipeline
+        if test_pipeline_cfg[0]['type'] == 'LoadImageFromFile':
+            # Image loading is finished in `self.preprocess`.
+            test_pipeline_cfg = test_pipeline_cfg[1:]
+        test_pipeline = Compose(
+            [TRANSFORMS.build(t) for t in test_pipeline_cfg])
+        return test_pipeline
+    def preprocess(self, inputs: List[InputType], batch_size: int = 1):
+        def load_image(input_):
+            img = imread(input_)
+            if img is None:
+                raise ValueError(f'Failed to read image {input_}.')
+            return dict(
+                img=img,
+                img_shape=img.shape[:2],
+                ori_shape=img.shape[:2],
+            )
+        pipeline = Compose([load_image, self.pipeline])
+        chunked_data = self._get_chunk_data(map(pipeline, inputs), batch_size)
+        yield from map(default_collate, chunked_data)
+    def visualize(self):
+        raise NotImplementedError(
+            "The FeatureExtractor doesn't support visualization.")
+    def postprocess(self):
+        raise NotImplementedError(
+            "The FeatureExtractor doesn't need postprocessing.")
+    @staticmethod
+    def list_models(pattern: Optional[str] = None):
+        """List all available model names.
+        Args:
+            pattern (str | None): A wildcard pattern to match model names.
+        Returns:
+            List[str]: a list of model names.
+        """
+        return list_models(pattern=pattern)

mmpretrain/apis/image_caption.py ADDED Viewed

	@@ -0,0 +1,164 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from pathlib import Path
+from typing import Callable, List, Optional
+import numpy as np
+from mmcv.image import imread
+from mmengine.config import Config
+from mmengine.dataset import Compose, default_collate
+from mmpretrain.registry import TRANSFORMS
+from mmpretrain.structures import DataSample
+from .base import BaseInferencer, InputType
+from .model import list_models
+class ImageCaptionInferencer(BaseInferencer):
+    """The inferencer for image caption.
+    Args:
+        model (BaseModel | str | Config): A model name or a path to the config
+            file, or a :obj:`BaseModel` object. The model name can be found
+            by ``ImageCaptionInferencer.list_models()`` and you can also
+            query it in :doc:`/modelzoo_statistics`.
+        pretrained (str, optional): Path to the checkpoint. If None, it will
+            try to find a pre-defined weight from the model you specified
+            (only work if the ``model`` is a model name). Defaults to None.
+        device (str, optional): Device to run inference. If None, the available
+            device will be automatically used. Defaults to None.
+        **kwargs: Other keyword arguments to initialize the model (only work if
+            the ``model`` is a model name).
+    Example:
+        >>> from mmpretrain import ImageCaptionInferencer
+        >>> inferencer = ImageCaptionInferencer('blip-base_3rdparty_caption')
+        >>> inferencer('demo/cat-dog.png')[0]
+        {'pred_caption': 'a puppy and a cat sitting on a blanket'}
+    """  # noqa: E501
+    visualize_kwargs: set = {'resize', 'show', 'show_dir', 'wait_time'}
+    def __call__(self,
+                 images: InputType,
+                 return_datasamples: bool = False,
+                 batch_size: int = 1,
+                 **kwargs) -> dict:
+        """Call the inferencer.
+        Args:
+            images (str | array | list): The image path or array, or a list of
+                images.
+            return_datasamples (bool): Whether to return results as
+                :obj:`DataSample`. Defaults to False.
+            batch_size (int): Batch size. Defaults to 1.
+            resize (int, optional): Resize the short edge of the image to the
+                specified length before visualization. Defaults to None.
+            draw_score (bool): Whether to draw the prediction scores
+                of prediction categories. Defaults to True.
+            show (bool): Whether to display the visualization result in a
+                window. Defaults to False.
+            wait_time (float): The display time (s). Defaults to 0, which means
+                "forever".
+            show_dir (str, optional): If not None, save the visualization
+                results in the specified directory. Defaults to None.
+        Returns:
+            list: The inference results.
+        """
+        return super().__call__(images, return_datasamples, batch_size,
+                                **kwargs)
+    def _init_pipeline(self, cfg: Config) -> Callable:
+        test_pipeline_cfg = cfg.test_dataloader.dataset.pipeline
+        if test_pipeline_cfg[0]['type'] == 'LoadImageFromFile':
+            # Image loading is finished in `self.preprocess`.
+            test_pipeline_cfg = test_pipeline_cfg[1:]
+        test_pipeline = Compose(
+            [TRANSFORMS.build(t) for t in test_pipeline_cfg])
+        return test_pipeline
+    def preprocess(self, inputs: List[InputType], batch_size: int = 1):
+        def load_image(input_):
+            img = imread(input_)
+            if img is None:
+                raise ValueError(f'Failed to read image {input_}.')
+            return dict(
+                img=img,
+                img_shape=img.shape[:2],
+                ori_shape=img.shape[:2],
+            )
+        pipeline = Compose([load_image, self.pipeline])
+        chunked_data = self._get_chunk_data(map(pipeline, inputs), batch_size)
+        yield from map(default_collate, chunked_data)
+    def visualize(self,
+                  ori_inputs: List[InputType],
+                  preds: List[DataSample],
+                  show: bool = False,
+                  wait_time: int = 0,
+                  resize: Optional[int] = None,
+                  show_dir=None):
+        if not show and show_dir is None:
+            return None
+        if self.visualizer is None:
+            from mmpretrain.visualization import UniversalVisualizer
+            self.visualizer = UniversalVisualizer()
+        visualization = []
+        for i, (input_, data_sample) in enumerate(zip(ori_inputs, preds)):
+            image = imread(input_)
+            if isinstance(input_, str):
+                # The image loaded from path is BGR format.
+                image = image[..., ::-1]
+                name = Path(input_).stem
+            else:
+                name = str(i)
+            if show_dir is not None:
+                show_dir = Path(show_dir)
+                show_dir.mkdir(exist_ok=True)
+                out_file = str((show_dir / name).with_suffix('.png'))
+            else:
+                out_file = None
+            self.visualizer.visualize_image_caption(
+                image,
+                data_sample,
+                resize=resize,
+                show=show,
+                wait_time=wait_time,
+                name=name,
+                out_file=out_file)
+            visualization.append(self.visualizer.get_image())
+        if show:
+            self.visualizer.close()
+        return visualization
+    def postprocess(self,
+                    preds: List[DataSample],
+                    visualization: List[np.ndarray],
+                    return_datasamples=False) -> dict:
+        if return_datasamples:
+            return preds
+        results = []
+        for data_sample in preds:
+            results.append({'pred_caption': data_sample.get('pred_caption')})
+        return results
+    @staticmethod
+    def list_models(pattern: Optional[str] = None):
+        """List all available model names.
+        Args:
+            pattern (str | None): A wildcard pattern to match model names.
+        Returns:
+            List[str]: a list of model names.
+        """
+        return list_models(pattern=pattern, task='Image Caption')

mmpretrain/apis/image_classification.py ADDED Viewed

	@@ -0,0 +1,221 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from pathlib import Path
+from typing import Callable, List, Optional, Union
+import numpy as np
+import torch
+from mmcv.image import imread
+from mmengine.config import Config
+from mmengine.dataset import Compose, default_collate
+from mmpretrain.registry import TRANSFORMS
+from mmpretrain.structures import DataSample
+from .base import BaseInferencer, InputType, ModelType
+from .model import list_models
+class ImageClassificationInferencer(BaseInferencer):
+    """The inferencer for image classification.
+    Args:
+        model (BaseModel | str | Config): A model name or a path to the config
+            file, or a :obj:`BaseModel` object. The model name can be found
+            by ``ImageClassificationInferencer.list_models()`` and you can also
+            query it in :doc:`/modelzoo_statistics`.
+        pretrained (str, optional): Path to the checkpoint. If None, it will
+            try to find a pre-defined weight from the model you specified
+            (only work if the ``model`` is a model name). Defaults to None.
+        device (str, optional): Device to run inference. If None, the available
+            device will be automatically used. Defaults to None.
+        **kwargs: Other keyword arguments to initialize the model (only work if
+            the ``model`` is a model name).
+    Example:
+        1. Use a pre-trained model in MMPreTrain to inference an image.
+           >>> from mmpretrain import ImageClassificationInferencer
+           >>> inferencer = ImageClassificationInferencer('resnet50_8xb32_in1k')
+           >>> inferencer('demo/demo.JPEG')
+           [{'pred_score': array([...]),
+             'pred_label': 65,
+             'pred_score': 0.6649367809295654,
+             'pred_class': 'sea snake'}]
+        2. Use a config file and checkpoint to inference multiple images on GPU,
+           and save the visualization results in a folder.
+           >>> from mmpretrain import ImageClassificationInferencer
+           >>> inferencer = ImageClassificationInferencer(
+                   model='configs/resnet/resnet50_8xb32_in1k.py',
+                   pretrained='https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth',
+                   device='cuda')
+           >>> inferencer(['demo/dog.jpg', 'demo/bird.JPEG'], show_dir="./visualize/")
+    """  # noqa: E501
+    visualize_kwargs: set = {
+        'resize', 'rescale_factor', 'draw_score', 'show', 'show_dir',
+        'wait_time'
+    }
+    def __init__(self,
+                 model: ModelType,
+                 pretrained: Union[bool, str] = True,
+                 device: Union[str, torch.device, None] = None,
+                 classes=None,
+                 **kwargs) -> None:
+        super().__init__(
+            model=model, pretrained=pretrained, device=device, **kwargs)
+        if classes is not None:
+            self.classes = classes
+        else:
+            self.classes = getattr(self.model, '_dataset_meta',
+                                   {}).get('classes')
+    def __call__(self,
+                 inputs: InputType,
+                 return_datasamples: bool = False,
+                 batch_size: int = 1,
+                 **kwargs) -> dict:
+        """Call the inferencer.
+        Args:
+            inputs (str | array | list): The image path or array, or a list of
+                images.
+            return_datasamples (bool): Whether to return results as
+                :obj:`DataSample`. Defaults to False.
+            batch_size (int): Batch size. Defaults to 1.
+            resize (int, optional): Resize the short edge of the image to the
+                specified length before visualization. Defaults to None.
+            rescale_factor (float, optional): Rescale the image by the rescale
+                factor for visualization. This is helpful when the image is too
+                large or too small for visualization. Defaults to None.
+            draw_score (bool): Whether to draw the prediction scores
+                of prediction categories. Defaults to True.
+            show (bool): Whether to display the visualization result in a
+                window. Defaults to False.
+            wait_time (float): The display time (s). Defaults to 0, which means
+                "forever".
+            show_dir (str, optional): If not None, save the visualization
+                results in the specified directory. Defaults to None.
+        Returns:
+            list: The inference results.
+        """
+        return super().__call__(
+            inputs,
+            return_datasamples=return_datasamples,
+            batch_size=batch_size,
+            **kwargs)
+    def _init_pipeline(self, cfg: Config) -> Callable:
+        test_pipeline_cfg = cfg.test_dataloader.dataset.pipeline
+        if test_pipeline_cfg[0]['type'] == 'LoadImageFromFile':
+            # Image loading is finished in `self.preprocess`.
+            test_pipeline_cfg = test_pipeline_cfg[1:]
+        test_pipeline = Compose(
+            [TRANSFORMS.build(t) for t in test_pipeline_cfg])
+        return test_pipeline
+    def preprocess(self, inputs: List[InputType], batch_size: int = 1):
+        def load_image(input_):
+            img = imread(input_)
+            if img is None:
+                raise ValueError(f'Failed to read image {input_}.')
+            return dict(
+                img=img,
+                img_shape=img.shape[:2],
+                ori_shape=img.shape[:2],
+            )
+        pipeline = Compose([load_image, self.pipeline])
+        chunked_data = self._get_chunk_data(map(pipeline, inputs), batch_size)
+        yield from map(default_collate, chunked_data)
+    def visualize(self,
+                  ori_inputs: List[InputType],
+                  preds: List[DataSample],
+                  show: bool = False,
+                  wait_time: int = 0,
+                  resize: Optional[int] = None,
+                  rescale_factor: Optional[float] = None,
+                  draw_score=True,
+                  show_dir=None):
+        if not show and show_dir is None:
+            return None
+        if self.visualizer is None:
+            from mmpretrain.visualization import UniversalVisualizer
+            self.visualizer = UniversalVisualizer()
+        visualization = []
+        for i, (input_, data_sample) in enumerate(zip(ori_inputs, preds)):
+            image = imread(input_)
+            if isinstance(input_, str):
+                # The image loaded from path is BGR format.
+                image = image[..., ::-1]
+                name = Path(input_).stem
+            else:
+                name = str(i)
+            if show_dir is not None:
+                show_dir = Path(show_dir)
+                show_dir.mkdir(exist_ok=True)
+                out_file = str((show_dir / name).with_suffix('.png'))
+            else:
+                out_file = None
+            self.visualizer.visualize_cls(
+                image,
+                data_sample,
+                classes=self.classes,
+                resize=resize,
+                show=show,
+                wait_time=wait_time,
+                rescale_factor=rescale_factor,
+                draw_gt=False,
+                draw_pred=True,
+                draw_score=draw_score,
+                name=name,
+                out_file=out_file)
+            visualization.append(self.visualizer.get_image())
+        if show:
+            self.visualizer.close()
+        return visualization
+    def postprocess(self,
+                    preds: List[DataSample],
+                    visualization: List[np.ndarray],
+                    return_datasamples=False) -> dict:
+        if return_datasamples:
+            return preds
+        results = []
+        for data_sample in preds:
+            pred_scores = data_sample.pred_score
+            pred_score = float(torch.max(pred_scores).item())
+            pred_label = torch.argmax(pred_scores).item()
+            result = {
+                'pred_scores': pred_scores.detach().cpu().numpy(),
+                'pred_label': pred_label,
+                'pred_score': pred_score,
+            }
+            if self.classes is not None:
+                result['pred_class'] = self.classes[pred_label]
+            results.append(result)
+        return results
+    @staticmethod
+    def list_models(pattern: Optional[str] = None):
+        """List all available model names.
+        Args:
+            pattern (str | None): A wildcard pattern to match model names.
+        Returns:
+            List[str]: a list of model names.
+        """
+        return list_models(pattern=pattern, task='Image Classification')

mmpretrain/apis/image_retrieval.py ADDED Viewed

	@@ -0,0 +1,285 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from pathlib import Path
+from typing import Callable, List, Optional, Union
+import numpy as np
+import torch
+from mmcv.image import imread
+from mmengine.config import Config
+from mmengine.dataset import BaseDataset, Compose, default_collate
+from mmpretrain.registry import TRANSFORMS
+from mmpretrain.structures import DataSample
+from .base import BaseInferencer, InputType, ModelType
+from .model import list_models
+class ImageRetrievalInferencer(BaseInferencer):
+    """The inferencer for image to image retrieval.
+    Args:
+        model (BaseModel | str | Config): A model name or a path to the config
+            file, or a :obj:`BaseModel` object. The model name can be found
+            by ``ImageRetrievalInferencer.list_models()`` and you can also
+            query it in :doc:`/modelzoo_statistics`.
+        prototype (str | list | dict | DataLoader, BaseDataset): The images to
+            be retrieved. It can be the following types:
+            - str: The directory of the the images.
+            - list: A list of path of the images.
+            - dict: A config dict of the a prototype dataset.
+            - BaseDataset: A prototype dataset.
+            - DataLoader: A data loader to load the prototype data.
+        prototype_cache (str, optional): The path of the generated prototype
+            features. If exists, directly load the cache instead of re-generate
+            the prototype features. If not exists, save the generated features
+            to the path. Defaults to None.
+        pretrained (str, optional): Path to the checkpoint. If None, it will
+            try to find a pre-defined weight from the model you specified
+            (only work if the ``model`` is a model name). Defaults to None.
+        device (str, optional): Device to run inference. If None, the available
+            device will be automatically used. Defaults to None.
+        **kwargs: Other keyword arguments to initialize the model (only work if
+            the ``model`` is a model name).
+    Example:
+        >>> from mmpretrain import ImageRetrievalInferencer
+        >>> inferencer = ImageRetrievalInferencer(
+        ...     'resnet50-arcface_8xb32_inshop',
+        ...     prototype='./demo/',
+        ...     prototype_cache='img_retri.pth')
+        >>> inferencer('demo/cat-dog.png', topk=2)[0][1]
+        {'match_score': tensor(0.4088, device='cuda:0'),
+         'sample_idx': 3,
+         'sample': {'img_path': './demo/dog.jpg'}}
+    """  # noqa: E501
+    visualize_kwargs: set = {
+        'draw_score', 'resize', 'show_dir', 'show', 'wait_time', 'topk'
+    }
+    postprocess_kwargs: set = {'topk'}
+    def __init__(
+        self,
+        model: ModelType,
+        prototype,
+        prototype_cache=None,
+        prepare_batch_size=8,
+        pretrained: Union[bool, str] = True,
+        device: Union[str, torch.device, None] = None,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            model=model, pretrained=pretrained, device=device, **kwargs)
+        self.prototype_dataset = self._prepare_prototype(
+            prototype, prototype_cache, prepare_batch_size)
+    def _prepare_prototype(self, prototype, cache=None, batch_size=8):
+        from mmengine.dataset import DefaultSampler
+        from torch.utils.data import DataLoader
+        def build_dataloader(dataset):
+            return DataLoader(
+                dataset,
+                batch_size=batch_size,
+                collate_fn=default_collate,
+                sampler=DefaultSampler(dataset, shuffle=False),
+                persistent_workers=False,
+            )
+        if isinstance(prototype, str):
+            # A directory path of images
+            prototype = dict(
+                type='CustomDataset', with_label=False, data_root=prototype)
+        if isinstance(prototype, list):
+            test_pipeline = [dict(type='LoadImageFromFile'), self.pipeline]
+            dataset = BaseDataset(
+                lazy_init=True, serialize_data=False, pipeline=test_pipeline)
+            dataset.data_list = [{
+                'sample_idx': i,
+                'img_path': file
+            } for i, file in enumerate(prototype)]
+            dataset._fully_initialized = True
+            dataloader = build_dataloader(dataset)
+        elif isinstance(prototype, dict):
+            # A config of dataset
+            from mmpretrain.registry import DATASETS
+            test_pipeline = [dict(type='LoadImageFromFile'), self.pipeline]
+            dataset = DATASETS.build(prototype)
+            dataloader = build_dataloader(dataset)
+        elif isinstance(prototype, DataLoader):
+            dataset = prototype.dataset
+            dataloader = prototype
+        elif isinstance(prototype, BaseDataset):
+            dataset = prototype
+            dataloader = build_dataloader(dataset)
+        else:
+            raise TypeError(f'Unsupported prototype type {type(prototype)}.')
+        if cache is not None and Path(cache).exists():
+            self.model.prototype = cache
+        else:
+            self.model.prototype = dataloader
+        self.model.prepare_prototype()
+        from mmengine.logging import MMLogger
+        logger = MMLogger.get_current_instance()
+        if cache is None:
+            logger.info('The prototype has been prepared, you can use '
+                        '`save_prototype` to dump it into a pickle '
+                        'file for the future usage.')
+        elif not Path(cache).exists():
+            self.save_prototype(cache)
+            logger.info(f'The prototype has been saved at {cache}.')
+        return dataset
+    def save_prototype(self, path):
+        self.model.dump_prototype(path)
+    def __call__(self,
+                 inputs: InputType,
+                 return_datasamples: bool = False,
+                 batch_size: int = 1,
+                 **kwargs) -> dict:
+        """Call the inferencer.
+        Args:
+            inputs (str | array | list): The image path or array, or a list of
+                images.
+            return_datasamples (bool): Whether to return results as
+                :obj:`DataSample`. Defaults to False.
+            batch_size (int): Batch size. Defaults to 1.
+            resize (int, optional): Resize the long edge of the image to the
+                specified length before visualization. Defaults to None.
+            draw_score (bool): Whether to draw the match scores.
+                Defaults to True.
+            show (bool): Whether to display the visualization result in a
+                window. Defaults to False.
+            wait_time (float): The display time (s). Defaults to 0, which means
+                "forever".
+            show_dir (str, optional): If not None, save the visualization
+                results in the specified directory. Defaults to None.
+        Returns:
+            list: The inference results.
+        """
+        return super().__call__(inputs, return_datasamples, batch_size,
+                                **kwargs)
+    def _init_pipeline(self, cfg: Config) -> Callable:
+        test_pipeline_cfg = cfg.test_dataloader.dataset.pipeline
+        if test_pipeline_cfg[0]['type'] == 'LoadImageFromFile':
+            # Image loading is finished in `self.preprocess`.
+            test_pipeline_cfg = test_pipeline_cfg[1:]
+        test_pipeline = Compose(
+            [TRANSFORMS.build(t) for t in test_pipeline_cfg])
+        return test_pipeline
+    def preprocess(self, inputs: List[InputType], batch_size: int = 1):
+        def load_image(input_):
+            img = imread(input_)
+            if img is None:
+                raise ValueError(f'Failed to read image {input_}.')
+            return dict(
+                img=img,
+                img_shape=img.shape[:2],
+                ori_shape=img.shape[:2],
+            )
+        pipeline = Compose([load_image, self.pipeline])
+        chunked_data = self._get_chunk_data(map(pipeline, inputs), batch_size)
+        yield from map(default_collate, chunked_data)
+    def visualize(self,
+                  ori_inputs: List[InputType],
+                  preds: List[DataSample],
+                  topk: int = 3,
+                  resize: Optional[int] = 224,
+                  show: bool = False,
+                  wait_time: int = 0,
+                  draw_score=True,
+                  show_dir=None):
+        if not show and show_dir is None:
+            return None
+        if self.visualizer is None:
+            from mmpretrain.visualization import UniversalVisualizer
+            self.visualizer = UniversalVisualizer()
+        visualization = []
+        for i, (input_, data_sample) in enumerate(zip(ori_inputs, preds)):
+            image = imread(input_)
+            if isinstance(input_, str):
+                # The image loaded from path is BGR format.
+                image = image[..., ::-1]
+                name = Path(input_).stem
+            else:
+                name = str(i)
+            if show_dir is not None:
+                show_dir = Path(show_dir)
+                show_dir.mkdir(exist_ok=True)
+                out_file = str((show_dir / name).with_suffix('.png'))
+            else:
+                out_file = None
+            self.visualizer.visualize_image_retrieval(
+                image,
+                data_sample,
+                self.prototype_dataset,
+                topk=topk,
+                resize=resize,
+                draw_score=draw_score,
+                show=show,
+                wait_time=wait_time,
+                name=name,
+                out_file=out_file)
+            visualization.append(self.visualizer.get_image())
+        if show:
+            self.visualizer.close()
+        return visualization
+    def postprocess(
+        self,
+        preds: List[DataSample],
+        visualization: List[np.ndarray],
+        return_datasamples=False,
+        topk=1,
+    ) -> dict:
+        if return_datasamples:
+            return preds
+        results = []
+        for data_sample in preds:
+            match_scores, indices = torch.topk(data_sample.pred_score, k=topk)
+            matches = []
+            for match_score, sample_idx in zip(match_scores, indices):
+                sample = self.prototype_dataset.get_data_info(
+                    sample_idx.item())
+                sample_idx = sample.pop('sample_idx')
+                matches.append({
+                    'match_score': match_score,
+                    'sample_idx': sample_idx,
+                    'sample': sample
+                })
+            results.append(matches)
+        return results
+    @staticmethod
+    def list_models(pattern: Optional[str] = None):
+        """List all available model names.
+        Args:
+            pattern (str | None): A wildcard pattern to match model names.
+        Returns:
+            List[str]: a list of model names.
+        """
+        return list_models(pattern=pattern, task='Image Retrieval')

mmpretrain/apis/model.py ADDED Viewed

	@@ -0,0 +1,408 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import fnmatch
+import os.path as osp
+import re
+import warnings
+from os import PathLike
+from pathlib import Path
+from typing import List, Tuple, Union
+from mmengine.config import Config
+from modelindex.load_model_index import load
+from modelindex.models.Model import Model
+class ModelHub:
+    """A hub to host the meta information of all pre-defined models."""
+    _models_dict = {}
+    __mmpretrain_registered = False
+    @classmethod
+    def register_model_index(cls,
+                             model_index_path: Union[str, PathLike],
+                             config_prefix: Union[str, PathLike, None] = None):
+        """Parse the model-index file and register all models.
+        Args:
+            model_index_path (str | PathLike): The path of the model-index
+                file.
+            config_prefix (str | PathLike | None): The prefix of all config
+                file paths in the model-index file.
+        """
+        model_index = load(str(model_index_path))
+        model_index.build_models_with_collections()
+        for metainfo in model_index.models:
+            model_name = metainfo.name.lower()
+            if metainfo.name in cls._models_dict:
+                raise ValueError(
+                    'The model name {} is conflict in {} and {}.'.format(
+                        model_name, osp.abspath(metainfo.filepath),
+                        osp.abspath(cls._models_dict[model_name].filepath)))
+            metainfo.config = cls._expand_config_path(metainfo, config_prefix)
+            cls._models_dict[model_name] = metainfo
+    @classmethod
+    def get(cls, model_name):
+        """Get the model's metainfo by the model name.
+        Args:
+            model_name (str): The name of model.
+        Returns:
+            modelindex.models.Model: The metainfo of the specified model.
+        """
+        cls._register_mmpretrain_models()
+        # lazy load config
+        metainfo = copy.deepcopy(cls._models_dict.get(model_name.lower()))
+        if metainfo is None:
+            raise ValueError(
+                f'Failed to find model "{model_name}". please use '
+                '`mmpretrain.list_models` to get all available names.')
+        if isinstance(metainfo.config, str):
+            metainfo.config = Config.fromfile(metainfo.config)
+        return metainfo
+    @staticmethod
+    def _expand_config_path(metainfo: Model,
+                            config_prefix: Union[str, PathLike] = None):
+        if config_prefix is None:
+            config_prefix = osp.dirname(metainfo.filepath)
+        if metainfo.config is None or osp.isabs(metainfo.config):
+            config_path: str = metainfo.config
+        else:
+            config_path = osp.abspath(osp.join(config_prefix, metainfo.config))
+        return config_path
+    @classmethod
+    def _register_mmpretrain_models(cls):
+        # register models in mmpretrain
+        if not cls.__mmpretrain_registered:
+            from importlib_metadata import distribution
+            root = distribution('mmpretrain').locate_file('mmpretrain')
+            model_index_path = root / '.mim' / 'model-index.yml'
+            ModelHub.register_model_index(
+                model_index_path, config_prefix=root / '.mim')
+            cls.__mmpretrain_registered = True
+    @classmethod
+    def has(cls, model_name):
+        """Whether a model name is in the ModelHub."""
+        return model_name in cls._models_dict
+def get_model(model: Union[str, Config],
+              pretrained: Union[str, bool] = False,
+              device=None,
+              device_map=None,
+              offload_folder=None,
+              url_mapping: Tuple[str, str] = None,
+              **kwargs):
+    """Get a pre-defined model or create a model from config.
+    Args:
+        model (str | Config): The name of model, the config file path or a
+            config instance.
+        pretrained (bool | str): When use name to specify model, you can
+            use ``True`` to load the pre-defined pretrained weights. And you
+            can also use a string to specify the path or link of weights to
+            load. Defaults to False.
+        device (str | torch.device | None): Transfer the model to the target
+            device. Defaults to None.
+        device_map (str | dict | None): A map that specifies where each
+            submodule should go. It doesn't need to be refined to each
+            parameter/buffer name, once a given module name is inside, every
+            submodule of it will be sent to the same device. You can use
+            `device_map="auto"` to automatically generate the device map.
+            Defaults to None.
+        offload_folder (str | None): If the `device_map` contains any value
+            `"disk"`, the folder where we will offload weights.
+        url_mapping (Tuple[str, str], optional): The mapping of pretrained
+            checkpoint link. For example, load checkpoint from a local dir
+            instead of download by ``('https://.*/', './checkpoint')``.
+            Defaults to None.
+        **kwargs: Other keyword arguments of the model config.
+    Returns:
+        mmengine.model.BaseModel: The result model.
+    Examples:
+        Get a ResNet-50 model and extract images feature:
+        >>> import torch
+        >>> from mmpretrain import get_model
+        >>> inputs = torch.rand(16, 3, 224, 224)
+        >>> model = get_model('resnet50_8xb32_in1k', pretrained=True, backbone=dict(out_indices=(0, 1, 2, 3)))
+        >>> feats = model.extract_feat(inputs)
+        >>> for feat in feats:
+        ...     print(feat.shape)
+        torch.Size([16, 256])
+        torch.Size([16, 512])
+        torch.Size([16, 1024])
+        torch.Size([16, 2048])
+        Get Swin-Transformer model with pre-trained weights and inference:
+        >>> from mmpretrain import get_model, inference_model
+        >>> model = get_model('swin-base_16xb64_in1k', pretrained=True)
+        >>> result = inference_model(model, 'demo/demo.JPEG')
+        >>> print(result['pred_class'])
+        'sea snake'
+    """  # noqa: E501
+    if device_map is not None:
+        from .utils import dispatch_model
+        dispatch_model._verify_require()
+    metainfo = None
+    if isinstance(model, Config):
+        config = copy.deepcopy(model)
+        if pretrained is True and 'load_from' in config:
+            pretrained = config.load_from
+    elif isinstance(model, (str, PathLike)) and Path(model).suffix == '.py':
+        config = Config.fromfile(model)
+        if pretrained is True and 'load_from' in config:
+            pretrained = config.load_from
+    elif isinstance(model, str):
+        metainfo = ModelHub.get(model)
+        config = metainfo.config
+        if pretrained is True and metainfo.weights is not None:
+            pretrained = metainfo.weights
+    else:
+        raise TypeError('model must be a name, a path or a Config object, '
+                        f'but got {type(config)}')
+    if pretrained is True:
+        warnings.warn('Unable to find pre-defined checkpoint of the model.')
+        pretrained = None
+    elif pretrained is False:
+        pretrained = None
+    if kwargs:
+        config.merge_from_dict({'model': kwargs})
+    config.model.setdefault('data_preprocessor',
+                            config.get('data_preprocessor', None))
+    from mmengine.registry import DefaultScope
+    from mmpretrain.registry import MODELS
+    with DefaultScope.overwrite_default_scope('mmpretrain'):
+        model = MODELS.build(config.model)
+    dataset_meta = {}
+    if pretrained:
+        # Mapping the weights to GPU may cause unexpected video memory leak
+        # which refers to https://github.com/open-mmlab/mmdetection/pull/6405
+        from mmengine.runner import load_checkpoint
+        if url_mapping is not None:
+            pretrained = re.sub(url_mapping[0], url_mapping[1], pretrained)
+        checkpoint = load_checkpoint(model, pretrained, map_location='cpu')
+        if 'dataset_meta' in checkpoint.get('meta', {}):
+            # mmpretrain 1.x
+            dataset_meta = checkpoint['meta']['dataset_meta']
+        elif 'CLASSES' in checkpoint.get('meta', {}):
+            # mmcls 0.x
+            dataset_meta = {'classes': checkpoint['meta']['CLASSES']}
+    if len(dataset_meta) == 0 and 'test_dataloader' in config:
+        from mmpretrain.registry import DATASETS
+        dataset_class = DATASETS.get(config.test_dataloader.dataset.type)
+        dataset_meta = getattr(dataset_class, 'METAINFO', {})
+    if device_map is not None:
+        model = dispatch_model(
+            model, device_map=device_map, offload_folder=offload_folder)
+    elif device is not None:
+        model.to(device)
+    model._dataset_meta = dataset_meta  # save the dataset meta
+    model._config = config  # save the config in the model
+    model._metainfo = metainfo  # save the metainfo in the model
+    model.eval()
+    return model
+def init_model(config, checkpoint=None, device=None, **kwargs):
+    """Initialize a classifier from config file (deprecated).
+    It's only for compatibility, please use :func:`get_model` instead.
+    Args:
+        config (str | :obj:`mmengine.Config`): Config file path or the config
+            object.
+        checkpoint (str, optional): Checkpoint path. If left as None, the model
+            will not load any weights.
+        device (str | torch.device | None): Transfer the model to the target
+            device. Defaults to None.
+        **kwargs: Other keyword arguments of the model config.
+    Returns:
+        nn.Module: The constructed model.
+    """
+    return get_model(config, checkpoint, device, **kwargs)
+def list_models(pattern=None, exclude_patterns=None, task=None) -> List[str]:
+    """List all models available in MMPretrain.
+    Args:
+        pattern (str | None): A wildcard pattern to match model names.
+            Defaults to None.
+        exclude_patterns (list | None): A list of wildcard patterns to
+            exclude names from the matched names. Defaults to None.
+        task (str | none): The evaluation task of the model.
+    Returns:
+        List[str]: a list of model names.
+    Examples:
+        List all models:
+        >>> from mmpretrain import list_models
+        >>> list_models()
+        List ResNet-50 models on ImageNet-1k dataset:
+        >>> from mmpretrain import list_models
+        >>> list_models('resnet*in1k')
+        ['resnet50_8xb32_in1k',
+         'resnet50_8xb32-fp16_in1k',
+         'resnet50_8xb256-rsb-a1-600e_in1k',
+         'resnet50_8xb256-rsb-a2-300e_in1k',
+         'resnet50_8xb256-rsb-a3-100e_in1k']
+        List Swin-Transformer models trained from stratch and exclude
+        Swin-Transformer-V2 models:
+        >>> from mmpretrain import list_models
+        >>> list_models('swin', exclude_patterns=['swinv2', '*-pre'])
+        ['swin-base_16xb64_in1k',
+         'swin-base_3rdparty_in1k',
+         'swin-base_3rdparty_in1k-384',
+         'swin-large_8xb8_cub-384px',
+         'swin-small_16xb64_in1k',
+         'swin-small_3rdparty_in1k',
+         'swin-tiny_16xb64_in1k',
+         'swin-tiny_3rdparty_in1k']
+        List all EVA models for image classification task.
+        >>> from mmpretrain import list_models
+        >>> list_models('eva', task='Image Classification')
+        ['eva-g-p14_30m-in21k-pre_3rdparty_in1k-336px',
+         'eva-g-p14_30m-in21k-pre_3rdparty_in1k-560px',
+         'eva-l-p14_mim-in21k-pre_3rdparty_in1k-196px',
+         'eva-l-p14_mim-in21k-pre_3rdparty_in1k-336px',
+         'eva-l-p14_mim-pre_3rdparty_in1k-196px',
+         'eva-l-p14_mim-pre_3rdparty_in1k-336px']
+    """
+    ModelHub._register_mmpretrain_models()
+    matches = set(ModelHub._models_dict.keys())
+    if pattern is not None:
+        # Always match keys with any postfix.
+        matches = set(fnmatch.filter(matches, pattern + '*'))
+    exclude_patterns = exclude_patterns or []
+    for exclude_pattern in exclude_patterns:
+        exclude = set(fnmatch.filter(matches, exclude_pattern + '*'))
+        matches = matches - exclude
+    if task is not None:
+        task_matches = []
+        for key in matches:
+            metainfo = ModelHub._models_dict[key]
+            if metainfo.results is None and task == 'null':
+                task_matches.append(key)
+            elif metainfo.results is None:
+                continue
+            elif task in [result.task for result in metainfo.results]:
+                task_matches.append(key)
+        matches = task_matches
+    return sorted(list(matches))
+def inference_model(model, *args, **kwargs):
+    """Inference an image with the inferencer.
+    Automatically select inferencer to inference according to the type of
+    model. It's a shortcut for a quick start, and for advanced usage, please
+    use the correspondding inferencer class.
+    Here is the mapping from task to inferencer:
+    - Image Classification: :class:`ImageClassificationInferencer`
+    - Image Retrieval: :class:`ImageRetrievalInferencer`
+    - Image Caption: :class:`ImageCaptionInferencer`
+    - Visual Question Answering: :class:`VisualQuestionAnsweringInferencer`
+    - Visual Grounding: :class:`VisualGroundingInferencer`
+    - Text-To-Image Retrieval: :class:`TextToImageRetrievalInferencer`
+    - Image-To-Text Retrieval: :class:`ImageToTextRetrievalInferencer`
+    - NLVR: :class:`NLVRInferencer`
+    Args:
+        model (BaseModel | str | Config): The loaded model, the model
+            name or the config of the model.
+        *args: Positional arguments to call the inferencer.
+        **kwargs: Other keyword arguments to initialize and call the
+            correspondding inferencer.
+    Returns:
+        result (dict): The inference results.
+    """  # noqa: E501
+    from mmengine.model import BaseModel
+    if isinstance(model, BaseModel):
+        metainfo = getattr(model, '_metainfo', None)
+    else:
+        metainfo = ModelHub.get(model)
+    from inspect import signature
+    from .image_caption import ImageCaptionInferencer
+    from .image_classification import ImageClassificationInferencer
+    from .image_retrieval import ImageRetrievalInferencer
+    from .multimodal_retrieval import (ImageToTextRetrievalInferencer,
+                                       TextToImageRetrievalInferencer)
+    from .nlvr import NLVRInferencer
+    from .visual_grounding import VisualGroundingInferencer
+    from .visual_question_answering import VisualQuestionAnsweringInferencer
+    task_mapping = {
+        'Image Classification': ImageClassificationInferencer,
+        'Image Retrieval': ImageRetrievalInferencer,
+        'Image Caption': ImageCaptionInferencer,
+        'Visual Question Answering': VisualQuestionAnsweringInferencer,
+        'Visual Grounding': VisualGroundingInferencer,
+        'Text-To-Image Retrieval': TextToImageRetrievalInferencer,
+        'Image-To-Text Retrieval': ImageToTextRetrievalInferencer,
+        'NLVR': NLVRInferencer,
+    }
+    inferencer_type = None
+    if metainfo is not None and metainfo.results is not None:
+        tasks = set(result.task for result in metainfo.results)
+        inferencer_type = [
+            task_mapping.get(task) for task in tasks if task in task_mapping
+        ]
+        if len(inferencer_type) > 1:
+            inferencer_names = [cls.__name__ for cls in inferencer_type]
+            warnings.warn('The model supports multiple tasks, auto select '
+                          f'{inferencer_names[0]}, you can also use other '
+                          f'inferencer {inferencer_names} directly.')
+        inferencer_type = inferencer_type[0]
+    if inferencer_type is None:
+        raise NotImplementedError('No available inferencer for the model')
+    init_kwargs = {
+        k: kwargs.pop(k)
+        for k in list(kwargs)
+        if k in signature(inferencer_type).parameters.keys()
+    }
+    inferencer = inferencer_type(model, **init_kwargs)
+    return inferencer(*args, **kwargs)[0]

mmpretrain/apis/multimodal_retrieval.py ADDED Viewed

	@@ -0,0 +1,603 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from copy import deepcopy
+from pathlib import Path
+from typing import Callable, List, Optional, Tuple, Union
+import mmengine
+import numpy as np
+import torch
+from mmcv.image import imread
+from mmengine.config import Config
+from mmengine.dataset import BaseDataset, Compose, default_collate
+from mmpretrain.registry import TRANSFORMS
+from mmpretrain.structures import DataSample
+from mmpretrain.utils import track
+from .base import BaseInferencer
+from .base import InputType as ImageType
+from .base import ModelType
+from .model import list_models
+def filter_transforms(transforms: list, data_info: dict):
+    """Filter pipeline to avoid KeyError with partial data info."""
+    data_info = deepcopy(data_info)
+    filtered_transforms = []
+    for t in transforms:
+        try:
+            data_info = t(data_info)
+            filtered_transforms.append(t)
+        except KeyError:
+            pass
+    return filtered_transforms
+class TextToImageRetrievalInferencer(BaseInferencer):
+    """The inferencer for text to image retrieval.
+    Args:
+        model (BaseModel | str | Config): A model name or a path to the config
+            file, or a :obj:`BaseModel` object. The model name can be found
+            by ``TextToImageRetrievalInferencer.list_models()`` and you can also
+            query it in :doc:`/modelzoo_statistics`.
+        prototype (str | list | dict | DataLoader | BaseDataset): The images to
+            be retrieved. It can be the following types:
+            - str: The directory of the the images.
+            - list: A list of path of the images.
+            - dict: A config dict of the a prototype dataset.
+            - BaseDataset: A prototype dataset.
+            - DataLoader: A data loader to load the prototype data.
+        prototype_cache (str, optional): The path of the generated prototype
+            features. If exists, directly load the cache instead of re-generate
+            the prototype features. If not exists, save the generated features
+            to the path. Defaults to None.
+        fast_match (bool): Some algorithms will record extra image features for
+            further matching, which may consume large memory, set True to avoid
+            this behavior. Defaults to True.
+        pretrained (str, optional): Path to the checkpoint. If None, it will
+            try to find a pre-defined weight from the model you specified
+            (only work if the ``model`` is a model name). Defaults to None.
+        device (str, optional): Device to run inference. If None, the available
+            device will be automatically used. Defaults to None.
+        **kwargs: Other keyword arguments to initialize the model (only work if
+            the ``model`` is a model name).
+    Example:
+        >>> from mmpretrain import TextToImageRetrievalInferencer
+        >>> inferencer = TextToImageRetrievalInferencer(
+        ...     'blip-base_3rdparty_retrieval',
+        ...     prototype='./demo/',
+        ...     prototype_cache='t2i_retri.pth')
+        >>> inferencer('A cat and a dog.')[0]
+        {'match_score': tensor(0.3855, device='cuda:0'),
+         'sample_idx': 1,
+         'sample': {'img_path': './demo/cat-dog.png'}}
+    """  # noqa: E501
+    visualize_kwargs: set = {
+        'draw_score', 'show_dir', 'show', 'wait_time', 'figsize', 'topk'
+    }
+    postprocess_kwargs: set = {'topk'}
+    def __init__(self,
+                 model: ModelType,
+                 prototype,
+                 prototype_cache=None,
+                 fast_match=True,
+                 prepare_batch_size=8,
+                 pretrained: Union[bool, str] = True,
+                 device: Union[str, torch.device, None] = None,
+                 **kwargs) -> None:
+        super().__init__(
+            model=model, pretrained=pretrained, device=device, **kwargs)
+        self.img_pipeline, self.text_pipeline = self.pipeline
+        if hasattr(self.model, 'fast_match'):
+            self.model.fast_match = fast_match
+        self.prototype_dataset = self._prepare_prototype(
+            prototype, prototype_cache, batch_size=prepare_batch_size)
+    def _prepare_prototype(self, prototype, cache=None, batch_size=8):
+        from mmengine.dataset import DefaultSampler
+        from torch.utils.data import DataLoader
+        def build_dataloader(dataset):
+            return DataLoader(
+                dataset,
+                batch_size=batch_size,
+                collate_fn=default_collate,
+                sampler=DefaultSampler(dataset, shuffle=False),
+                persistent_workers=False,
+            )
+        if isinstance(prototype, str):
+            # A directory path of images
+            prototype = dict(
+                type='CustomDataset', with_label=False, data_root=prototype)
+        if isinstance(prototype, list):
+            test_pipeline = [dict(type='LoadImageFromFile'), self.img_pipeline]
+            dataset = BaseDataset(
+                lazy_init=True, serialize_data=False, pipeline=test_pipeline)
+            dataset.data_list = [{
+                'sample_idx': i,
+                'img_path': file
+            } for i, file in enumerate(prototype)]
+            dataset._fully_initialized = True
+            dataloader = build_dataloader(dataset)
+        elif isinstance(prototype, dict):
+            # A config of dataset
+            from mmpretrain.registry import DATASETS
+            test_pipeline = [dict(type='LoadImageFromFile'), self.img_pipeline]
+            prototype.setdefault('pipeline', test_pipeline)
+            dataset = DATASETS.build(prototype)
+            dataloader = build_dataloader(dataset)
+        elif isinstance(prototype, list):
+            test_pipeline = [dict(type='LoadImageFromFile'), self.img_pipeline]
+            dataset = BaseDataset(
+                lazy_init=True, serialize_data=False, pipeline=test_pipeline)
+            dataset.data_list = [{
+                'sample_idx': i,
+                'img_path': file
+            } for i, file in enumerate(prototype)]
+            dataset._fully_initialized = True
+            dataloader = build_dataloader(dataset)
+        elif isinstance(prototype, DataLoader):
+            dataset = prototype.dataset
+            dataloader = prototype
+        elif isinstance(prototype, BaseDataset):
+            dataset = prototype
+            dataloader = build_dataloader(dataset)
+        else:
+            raise TypeError(f'Unsupported prototype type {type(prototype)}.')
+        if cache is not None and Path(cache).exists():
+            self.prototype = torch.load(cache)
+        else:
+            prototype = []
+            for data_batch in track(dataloader, 'Prepare prototype...'):
+                with torch.no_grad():
+                    data_batch = self.model.data_preprocessor(
+                        data_batch, False)
+                    feats = self.model._run_forward(data_batch, mode='tensor')
+                    prototype.append(feats)
+            prototype = {
+                k: torch.cat([d[k] for d in prototype])
+                for k in prototype[0]
+            }
+            self.prototype = prototype
+        from mmengine.logging import MMLogger
+        logger = MMLogger.get_current_instance()
+        if cache is None:
+            logger.info('The prototype has been prepared, you can use '
+                        '`save_prototype` to dump it into a pickle '
+                        'file for the future usage.')
+        elif not Path(cache).exists():
+            self.save_prototype(cache)
+            logger.info(f'The prototype has been saved at {cache}.')
+        return dataset
+    def save_prototype(self, path):
+        torch.save(self.prototype, path)
+    def __call__(self,
+                 inputs: ImageType,
+                 return_datasamples: bool = False,
+                 batch_size: int = 1,
+                 **kwargs) -> dict:
+        """Call the inferencer.
+        Args:
+            inputs (str | array | list): The image path or array, or a list of
+                images.
+            return_datasamples (bool): Whether to return results as
+                :obj:`DataSample`. Defaults to False.
+            batch_size (int): Batch size. Defaults to 1.
+            resize (int, optional): Resize the long edge of the image to the
+                specified length before visualization. Defaults to None.
+            draw_score (bool): Whether to draw the match scores.
+                Defaults to True.
+            show (bool): Whether to display the visualization result in a
+                window. Defaults to False.
+            wait_time (float): The display time (s). Defaults to 0, which means
+                "forever".
+            show_dir (str, optional): If not None, save the visualization
+                results in the specified directory. Defaults to None.
+        Returns:
+            list: The inference results.
+        """
+        return super().__call__(inputs, return_datasamples, batch_size,
+                                **kwargs)
+    @torch.no_grad()
+    def forward(self, data: dict, **kwargs):
+        """Feed the inputs to the model."""
+        data = self.model.data_preprocessor(data, False)
+        data_samples = data['data_samples']
+        feats = self.prototype.copy()
+        feats.update(self.model.extract_feat(data_samples=data_samples))
+        return self.model.predict_all(feats, data_samples, cal_i2t=False)[0]
+    def _init_pipeline(self, cfg: Config) -> Callable:
+        test_pipeline_cfg = cfg.test_dataloader.dataset.pipeline
+        test_transfroms = [TRANSFORMS.build(t) for t in test_pipeline_cfg]
+        img_info = {'img': np.zeros((224, 224, 3), dtype=np.uint8)}
+        text_info = {'text': 'example'}
+        img_pipeline = Compose(filter_transforms(test_transfroms, img_info))
+        text_pipeline = Compose(filter_transforms(test_transfroms, text_info))
+        return img_pipeline, text_pipeline
+    def preprocess(self, inputs: List[str], batch_size: int = 1):
+        def process_text(input_: str):
+            return self.text_pipeline({'text': input_})
+        chunked_data = self._get_chunk_data(
+            map(process_text, inputs), batch_size)
+        yield from map(default_collate, chunked_data)
+    def visualize(self,
+                  ori_inputs: List[str],
+                  preds: List[DataSample],
+                  topk: int = 3,
+                  figsize: Tuple[int, int] = (16, 9),
+                  show: bool = False,
+                  wait_time: int = 0,
+                  draw_score=True,
+                  show_dir=None):
+        if not show and show_dir is None:
+            return None
+        if self.visualizer is None:
+            from mmpretrain.visualization import UniversalVisualizer
+            self.visualizer = UniversalVisualizer()
+        visualization = []
+        for i, (text, data_sample) in enumerate(zip(ori_inputs, preds)):
+            name = str(i)
+            if show_dir is not None:
+                show_dir = Path(show_dir)
+                show_dir.mkdir(exist_ok=True)
+                out_file = str((show_dir / name).with_suffix('.png'))
+            else:
+                out_file = None
+            self.visualizer.visualize_t2i_retrieval(
+                text,
+                data_sample,
+                self.prototype_dataset,
+                topk=topk,
+                fig_cfg=dict(figsize=figsize),
+                draw_score=draw_score,
+                show=show,
+                wait_time=wait_time,
+                name=name,
+                out_file=out_file)
+            visualization.append(self.visualizer.get_image())
+        if show:
+            self.visualizer.close()
+        return visualization
+    def postprocess(
+        self,
+        preds: List[DataSample],
+        visualization: List[np.ndarray],
+        return_datasamples=False,
+        topk=1,
+    ) -> dict:
+        if return_datasamples:
+            return preds
+        results = []
+        for data_sample in preds:
+            match_scores, indices = torch.topk(data_sample.pred_score, k=topk)
+            matches = []
+            for match_score, sample_idx in zip(match_scores, indices):
+                sample = self.prototype_dataset.get_data_info(
+                    sample_idx.item())
+                sample_idx = sample.pop('sample_idx')
+                matches.append({
+                    'match_score': match_score,
+                    'sample_idx': sample_idx,
+                    'sample': sample
+                })
+            results.append(matches)
+        return results
+    @staticmethod
+    def list_models(pattern: Optional[str] = None):
+        """List all available model names.
+        Args:
+            pattern (str | None): A wildcard pattern to match model names.
+        Returns:
+            List[str]: a list of model names.
+        """
+        return list_models(pattern=pattern, task='Text-To-Image Retrieval')
+class ImageToTextRetrievalInferencer(BaseInferencer):
+    """The inferencer for image to text retrieval.
+    Args:
+        model (BaseModel | str | Config): A model name or a path to the config
+            file, or a :obj:`BaseModel` object. The model name can be found
+            by ``ImageToTextRetrievalInferencer.list_models()`` and you can
+            also query it in :doc:`/modelzoo_statistics`.
+        prototype (str | list | dict | DataLoader, BaseDataset): The images to
+            be retrieved. It can be the following types:
+            - str: The file path to load the string list.
+            - list: A list of string.
+        prototype_cache (str, optional): The path of the generated prototype
+            features. If exists, directly load the cache instead of re-generate
+            the prototype features. If not exists, save the generated features
+            to the path. Defaults to None.
+        fast_match (bool): Some algorithms will record extra image features for
+            further matching, which may consume large memory, set True to avoid
+            this behavior. Defaults to True.
+        pretrained (str, optional): Path to the checkpoint. If None, it will
+            try to find a pre-defined weight from the model you specified
+            (only work if the ``model`` is a model name). Defaults to None.
+        device (str, optional): Device to run inference. If None, the available
+            device will be automatically used. Defaults to None.
+        **kwargs: Other keyword arguments to initialize the model (only work if
+            the ``model`` is a model name).
+    Example:
+        >>> from mmpretrain import ImageToTextRetrievalInferencer
+        >>> inferencer = ImageToTextRetrievalInferencer(
+        ...     'blip-base_3rdparty_retrieval',
+        ...     prototype=['cat', 'dog', 'snake', 'bird'],
+        ...     prototype_cache='i2t_retri.pth')
+        >>> inferencer('demo/bird.JPEG')[0]
+        {'match_score': tensor(0.3855, device='cuda:0'),
+         'sample_idx': 1,
+         'sample': {'img_path': './demo/cat-dog.png'}}
+    """  # noqa: E501
+    visualize_kwargs: set = {
+        'draw_score', 'resize', 'show_dir', 'show', 'wait_time', 'topk'
+    }
+    postprocess_kwargs: set = {'topk'}
+    def __init__(self,
+                 model: ModelType,
+                 prototype,
+                 prototype_cache=None,
+                 fast_match=True,
+                 prepare_batch_size=8,
+                 pretrained: Union[bool, str] = True,
+                 device: Union[str, torch.device, None] = None,
+                 **kwargs) -> None:
+        super().__init__(
+            model=model, pretrained=pretrained, device=device, **kwargs)
+        self.img_pipeline, self.text_pipeline = self.pipeline
+        if hasattr(self.model, 'fast_match'):
+            self.model.fast_match = fast_match
+        self.prototype_dataset = self._prepare_prototype(
+            prototype, cache=prototype_cache, batch_size=prepare_batch_size)
+    def _prepare_prototype(self, prototype, cache=None, batch_size=8):
+        from mmengine.dataset import DefaultSampler
+        from torch.utils.data import DataLoader
+        def build_dataloader(dataset):
+            return DataLoader(
+                [
+                    self.text_pipeline({
+                        'sample_idx': i,
+                        'text': text
+                    }) for i, text in enumerate(dataset)
+                ],
+                batch_size=batch_size,
+                collate_fn=default_collate,
+                sampler=DefaultSampler(dataset, shuffle=False),
+                persistent_workers=False,
+            )
+        if isinstance(prototype, str):
+            # A file path of a list of string
+            dataset = mmengine.list_from_file(prototype)
+        elif mmengine.utils.is_seq_of(prototype, str):
+            dataset = prototype
+        else:
+            raise TypeError(f'Unsupported prototype type {type(prototype)}.')
+        dataloader = build_dataloader(dataset)
+        if cache is not None and Path(cache).exists():
+            self.prototype = torch.load(cache)
+        else:
+            prototype = []
+            for data_batch in track(dataloader, 'Prepare prototype...'):
+                with torch.no_grad():
+                    data_batch = self.model.data_preprocessor(
+                        data_batch, False)
+                    feats = self.model._run_forward(data_batch, mode='tensor')
+                    prototype.append(feats)
+            prototype = {
+                k: torch.cat([d[k] for d in prototype])
+                for k in prototype[0]
+            }
+            self.prototype = prototype
+        from mmengine.logging import MMLogger
+        logger = MMLogger.get_current_instance()
+        if cache is None:
+            logger.info('The prototype has been prepared, you can use '
+                        '`save_prototype` to dump it into a pickle '
+                        'file for the future usage.')
+        elif not Path(cache).exists():
+            self.save_prototype(cache)
+            logger.info(f'The prototype has been saved at {cache}.')
+        return dataset
+    def save_prototype(self, path):
+        torch.save(self.prototype, path)
+    def __call__(self,
+                 inputs: ImageType,
+                 return_datasamples: bool = False,
+                 batch_size: int = 1,
+                 **kwargs) -> dict:
+        """Call the inferencer.
+        Args:
+            inputs (str | array | list): The image path or array, or a list of
+                images.
+            return_datasamples (bool): Whether to return results as
+                :obj:`DataSample`. Defaults to False.
+            batch_size (int): Batch size. Defaults to 1.
+            resize (int, optional): Resize the long edge of the image to the
+                specified length before visualization. Defaults to None.
+            draw_score (bool): Whether to draw the match scores.
+                Defaults to True.
+            show (bool): Whether to display the visualization result in a
+                window. Defaults to False.
+            wait_time (float): The display time (s). Defaults to 0, which means
+                "forever".
+            show_dir (str, optional): If not None, save the visualization
+                results in the specified directory. Defaults to None.
+        Returns:
+            list: The inference results.
+        """
+        return super().__call__(inputs, return_datasamples, batch_size,
+                                **kwargs)
+    @torch.no_grad()
+    def forward(self, data: dict, **kwargs):
+        """Feed the inputs to the model."""
+        data = self.model.data_preprocessor(data, False)
+        feats = self.prototype.copy()
+        feats.update(self.model.extract_feat(images=data['images']))
+        return self.model.predict_all(
+            feats, data['data_samples'], cal_t2i=False)[0]
+    def _init_pipeline(self, cfg: Config) -> Callable:
+        test_pipeline_cfg = cfg.test_dataloader.dataset.pipeline
+        test_transfroms = [TRANSFORMS.build(t) for t in test_pipeline_cfg]
+        img_info = {'img': np.zeros((224, 224, 3), dtype=np.uint8)}
+        text_info = {'text': 'example'}
+        img_pipeline = Compose(filter_transforms(test_transfroms, img_info))
+        text_pipeline = Compose(filter_transforms(test_transfroms, text_info))
+        return img_pipeline, text_pipeline
+    def preprocess(self, inputs: List[ImageType], batch_size: int = 1):
+        def load_image(input_):
+            img = imread(input_)
+            if img is None:
+                raise ValueError(f'Failed to read image {input_}.')
+            return dict(
+                img=img,
+                img_shape=img.shape[:2],
+                ori_shape=img.shape[:2],
+            )
+        pipeline = Compose([load_image, self.img_pipeline])
+        chunked_data = self._get_chunk_data(map(pipeline, inputs), batch_size)
+        yield from map(default_collate, chunked_data)
+    def visualize(self,
+                  ori_inputs: List[ImageType],
+                  preds: List[DataSample],
+                  topk: int = 3,
+                  resize: Optional[int] = 224,
+                  show: bool = False,
+                  wait_time: int = 0,
+                  draw_score=True,
+                  show_dir=None):
+        if not show and show_dir is None:
+            return None
+        if self.visualizer is None:
+            from mmpretrain.visualization import UniversalVisualizer
+            self.visualizer = UniversalVisualizer()
+        visualization = []
+        for i, (input_, data_sample) in enumerate(zip(ori_inputs, preds)):
+            image = imread(input_)
+            if isinstance(input_, str):
+                # The image loaded from path is BGR format.
+                image = image[..., ::-1]
+                name = Path(input_).stem
+            else:
+                name = str(i)
+            if show_dir is not None:
+                show_dir = Path(show_dir)
+                show_dir.mkdir(exist_ok=True)
+                out_file = str((show_dir / name).with_suffix('.png'))
+            else:
+                out_file = None
+            self.visualizer.visualize_i2t_retrieval(
+                image,
+                data_sample,
+                self.prototype_dataset,
+                topk=topk,
+                resize=resize,
+                draw_score=draw_score,
+                show=show,
+                wait_time=wait_time,
+                name=name,
+                out_file=out_file)
+            visualization.append(self.visualizer.get_image())
+        if show:
+            self.visualizer.close()
+        return visualization
+    def postprocess(
+        self,
+        preds: List[DataSample],
+        visualization: List[np.ndarray],
+        return_datasamples=False,
+        topk=1,
+    ) -> dict:
+        if return_datasamples:
+            return preds
+        results = []
+        for data_sample in preds:
+            match_scores, indices = torch.topk(data_sample.pred_score, k=topk)
+            matches = []
+            for match_score, sample_idx in zip(match_scores, indices):
+                text = self.prototype_dataset[sample_idx.item()]
+                matches.append({
+                    'match_score': match_score,
+                    'sample_idx': sample_idx,
+                    'text': text
+                })
+            results.append(matches)
+        return results
+    @staticmethod
+    def list_models(pattern: Optional[str] = None):
+        """List all available model names.
+        Args:
+            pattern (str | None): A wildcard pattern to match model names.
+        Returns:
+            List[str]: a list of model names.
+        """
+        return list_models(pattern=pattern, task='Image-To-Text Retrieval')

mmpretrain/apis/nlvr.py ADDED Viewed

	@@ -0,0 +1,150 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from copy import deepcopy
+from typing import Callable, List, Optional, Tuple, Union
+import numpy as np
+import torch
+from mmcv.image import imread
+from mmengine.config import Config
+from mmengine.dataset import Compose, default_collate
+from mmpretrain.registry import TRANSFORMS
+from mmpretrain.structures import DataSample
+from .base import BaseInferencer
+from .model import list_models
+InputType = Tuple[Union[str, np.ndarray], Union[str, np.ndarray], str]
+InputsType = Union[List[InputType], InputType]
+class NLVRInferencer(BaseInferencer):
+    """The inferencer for Natural Language for Visual Reasoning.
+    Args:
+        model (BaseModel | str | Config): A model name or a path to the config
+            file, or a :obj:`BaseModel` object. The model name can be found
+            by ``NLVRInferencer.list_models()`` and you can also
+            query it in :doc:`/modelzoo_statistics`.
+        pretrained (str, optional): Path to the checkpoint. If None, it will
+            try to find a pre-defined weight from the model you specified
+            (only work if the ``model`` is a model name). Defaults to None.
+        device (str, optional): Device to run inference. If None, the available
+            device will be automatically used. Defaults to None.
+        **kwargs: Other keyword arguments to initialize the model (only work if
+            the ``model`` is a model name).
+    """
+    visualize_kwargs: set = {
+        'resize', 'draw_score', 'show', 'show_dir', 'wait_time'
+    }
+    def __call__(self,
+                 inputs: InputsType,
+                 return_datasamples: bool = False,
+                 batch_size: int = 1,
+                 **kwargs) -> dict:
+        """Call the inferencer.
+        Args:
+            inputs (tuple, List[tuple]): The input data tuples, every tuple
+                should include three items (left image, right image, text).
+                The image can be a path or numpy array.
+            return_datasamples (bool): Whether to return results as
+                :obj:`DataSample`. Defaults to False.
+            batch_size (int): Batch size. Defaults to 1.
+            resize (int, optional): Resize the short edge of the image to the
+                specified length before visualization. Defaults to None.
+            draw_score (bool): Whether to draw the prediction scores
+                of prediction categories. Defaults to True.
+            show (bool): Whether to display the visualization result in a
+                window. Defaults to False.
+            wait_time (float): The display time (s). Defaults to 0, which means
+                "forever".
+            show_dir (str, optional): If not None, save the visualization
+                results in the specified directory. Defaults to None.
+        Returns:
+            list: The inference results.
+        """
+        assert isinstance(inputs, (tuple, list))
+        if isinstance(inputs, tuple):
+            inputs = [inputs]
+        for input_ in inputs:
+            assert isinstance(input_, tuple)
+            assert len(input_) == 3
+        return super().__call__(
+            inputs,
+            return_datasamples=return_datasamples,
+            batch_size=batch_size,
+            **kwargs)
+    def _init_pipeline(self, cfg: Config) -> Callable:
+        test_pipeline_cfg = cfg.test_dataloader.dataset.pipeline
+        assert test_pipeline_cfg[0]['type'] == 'ApplyToList'
+        list_pipeline = deepcopy(test_pipeline_cfg[0])
+        if list_pipeline.scatter_key == 'img_path':
+            # Remove `LoadImageFromFile`
+            list_pipeline.transforms.pop(0)
+            list_pipeline.scatter_key = 'img'
+        test_pipeline = Compose(
+            [TRANSFORMS.build(list_pipeline)] +
+            [TRANSFORMS.build(t) for t in test_pipeline_cfg[1:]])
+        return test_pipeline
+    def preprocess(self, inputs: InputsType, batch_size: int = 1):
+        def load_image(input_):
+            img1 = imread(input_[0])
+            img2 = imread(input_[1])
+            text = input_[2]
+            if img1 is None:
+                raise ValueError(f'Failed to read image {input_[0]}.')
+            if img2 is None:
+                raise ValueError(f'Failed to read image {input_[1]}.')
+            return dict(
+                img=[img1, img2],
+                img_shape=[img1.shape[:2], img2.shape[:2]],
+                ori_shape=[img1.shape[:2], img2.shape[:2]],
+                text=text,
+            )
+        pipeline = Compose([load_image, self.pipeline])
+        chunked_data = self._get_chunk_data(map(pipeline, inputs), batch_size)
+        yield from map(default_collate, chunked_data)
+    def postprocess(self,
+                    preds: List[DataSample],
+                    visualization: List[np.ndarray],
+                    return_datasamples=False) -> dict:
+        if return_datasamples:
+            return preds
+        results = []
+        for data_sample in preds:
+            pred_scores = data_sample.pred_score
+            pred_score = float(torch.max(pred_scores).item())
+            pred_label = torch.argmax(pred_scores).item()
+            result = {
+                'pred_scores': pred_scores.detach().cpu().numpy(),
+                'pred_label': pred_label,
+                'pred_score': pred_score,
+            }
+            results.append(result)
+        return results
+    @staticmethod
+    def list_models(pattern: Optional[str] = None):
+        """List all available model names.
+        Args:
+            pattern (str | None): A wildcard pattern to match model names.
+        Returns:
+            List[str]: a list of model names.
+        """
+        return list_models(pattern=pattern, task='NLVR')

mmpretrain/apis/utils.py ADDED Viewed

	@@ -0,0 +1,270 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+from collections import defaultdict
+from contextlib import contextmanager
+from itertools import chain
+from typing import Dict, List, Optional, Union
+import torch
+import torch.nn as nn
+from mmpretrain.utils import require
+@require('torch>=1.9.0', 'https://pytorch.org/get-started/locally/')
+@require('accelerate')
+def dispatch_model(
+    model,
+    device_map: Union[str, dict],
+    max_memory: Optional[dict] = None,
+    no_split_module_classes: Optional[List[str]] = None,
+    offload_folder: str = None,
+    offload_buffers: bool = False,
+    preload_module_classes: Optional[List[str]] = None,
+):
+    """Split and dispatch a model across devices.
+    The function depends on the `accelerate` package. Refers to
+    https://huggingface.co/docs/accelerate/main/en/usage_guides/big_modeling
+    Args:
+        model (torch.nn.Module): The model to dispatch.
+        device_map (str | dict | None): A map that specifies where each
+            submodule should go. It doesn't need to be refined to each
+            parameter/buffer name, once a given module name is inside, every
+            submodule of it will be sent to the same device. You can use
+            `device_map="auto"` to automatically generate the device map.
+            Defaults to None.
+        max_memory (dict | None): A dictionary device identifier to maximum
+            memory. Will default to the maximum memory available for each GPU
+            and the available CPU RAM if unset. Defaults to None.
+        no_split_module_classes (List[str] | None): A list of layer class names
+            that should never be split across device (for instance any layer
+            that has a residual connection). If None, try to get the settings
+            from the model class. Defaults to None.
+        offload_folder (str | None): If the `device_map` contains any value
+            `"disk"`, the folder where we will offload weights.
+        offload_buffers (bool): In the layers that are offloaded on the CPU
+            or the hard drive, whether or not to offload the buffers as
+            well as the parameters. Defaults to False.
+        preload_module_classes (List[str] | None): A list of classes whose
+            instances should load all their weights (even in the submodules) at
+            the beginning of the forward. This should only be used for classes
+            that have submodules which are registered but not called directly
+            during the forward, for instance if a `dense` linear layer is
+            registered, but at forward, `dense.weight` and `dense.bias` are
+            used in some operations instead of calling `dense` directly.
+            Defaults to None.
+    """
+    from accelerate import dispatch_model, infer_auto_device_map
+    # Check valid device_map string.
+    valid_map_option = ['auto', 'balanced', 'balanced_low_0', 'sequential']
+    if isinstance(device_map, str) and device_map not in valid_map_option:
+        raise ValueError('If passing a string for `device_map`, please choose '
+                         f'from {valid_map_option}.')
+    # Generate device map automatically
+    if isinstance(device_map, str):
+        if no_split_module_classes is None:
+            no_split_module_classes = getattr(model, '_no_split_modules', None)
+        if no_split_module_classes is None:
+            raise ValueError(f'{model.__class__.__name__} does not support '
+                             f"`device_map='{device_map}'` yet.")
+        if device_map != 'sequential':
+            from accelerate.utils import get_balanced_memory
+            max_memory = get_balanced_memory(
+                model,
+                max_memory=max_memory,
+                no_split_module_classes=no_split_module_classes,
+                dtype=None,
+                low_zero=(device_map == 'balanced_low_0'),
+            )
+            max_memory[0] *= 0.9
+        device_map = infer_auto_device_map(
+            model,
+            max_memory=max_memory,
+            no_split_module_classes=no_split_module_classes,
+            dtype=None,
+        )
+    if 'disk' in device_map.values():
+        if offload_folder is None:
+            raise ValueError(
+                'The current `device_map` had weights offloaded to the disk. '
+                'Please provide an `offload_folder` for them.')
+        os.makedirs(offload_folder, exist_ok=True)
+    main_device = next(
+        (d for d in device_map.values() if d not in ['cpu', 'disk']), 'cpu')
+    model = dispatch_model(
+        model,
+        device_map=device_map,
+        main_device=main_device,
+        offload_dir=offload_folder,
+        offload_buffers=offload_buffers,
+        preload_module_classes=preload_module_classes,
+    )
+    if hasattr(model, 'data_preprocessor'):
+        model.data_preprocessor._device = torch.device(main_device)
+    return model
+@contextmanager
+def init_empty_weights(include_buffers: bool = False):
+    """A context manager under which models are initialized with all parameters
+    on the meta device.
+    With this context manager, we can create an empty model. Useful when just
+    initializing the model would blow the available RAM.
+    Besides move the parameters to meta device, this method will also avoid
+    load checkpoint from `mmengine.runner.load_checkpoint` and
+    `transformers.PreTrainedModel.from_pretrained`.
+    Modified from https://github.com/huggingface/accelerate
+    Args:
+        include_buffers (bool): Whether put all buffers on the meta device
+            during initialization.
+    """
+    device = torch.device('meta')
+    # move parameter and buffer to meta device
+    old_register_parameter = nn.Module.register_parameter
+    if include_buffers:
+        old_register_buffer = nn.Module.register_buffer
+        # See https://github.com/huggingface/accelerate/pull/699
+        tensor_constructors_to_patch = {
+            torch_function_name: getattr(torch, torch_function_name)
+            for torch_function_name in ['empty', 'zeros', 'ones', 'full']
+        }
+    def register_parameter(module, name, param):
+        old_register_parameter(module, name, param)
+        if param is not None:
+            param_cls = type(module._parameters[name])
+            kwargs = module._parameters[name].__dict__
+            module._parameters[name] = param_cls(
+                module._parameters[name].to(device), **kwargs)
+    def register_buffer(module, name, buffer, *args, **kwargs):
+        old_register_buffer(module, name, buffer, *args, **kwargs)
+        if buffer is not None:
+            module._buffers[name] = module._buffers[name].to(device)
+    def patch_tensor_constructor(fn):
+        def wrapper(*args, **kwargs):
+            kwargs['device'] = device
+            return fn(*args, **kwargs)
+        return wrapper
+    # Patch load_checkpoint
+    import mmengine.runner.checkpoint as mmengine_load
+    old_load_checkpoint = mmengine_load.load_checkpoint
+    def patch_load_checkpoint(*args, **kwargs):
+        return {}
+    # Patch transformers from pretrained
+    try:
+        from transformers import PreTrainedModel
+        from transformers.models.auto.auto_factory import (AutoConfig,
+                                                           _BaseAutoModelClass)
+        with_transformers = True
+    except ImportError:
+        with_transformers = False
+    @classmethod
+    def patch_auto_model(cls, pretrained_model_name_or_path, *model_args,
+                         **kwargs):
+        cfg = AutoConfig.from_pretrained(pretrained_model_name_or_path,
+                                         *model_args, **kwargs)
+        return cls.from_config(cfg)
+    @classmethod
+    def patch_pretrained_model(cls, pretrained_model_name_or_path, *model_args,
+                               **kwargs):
+        cfg = cls.config_class.from_pretrained(pretrained_model_name_or_path,
+                                               *model_args, **kwargs)
+        return cls(cfg)
+    if with_transformers:
+        old_pretrained_model = PreTrainedModel.from_pretrained
+        old_auto_model = _BaseAutoModelClass.from_pretrained
+    try:
+        nn.Module.register_parameter = register_parameter
+        mmengine_load.load_checkpoint = patch_load_checkpoint
+        if with_transformers:
+            PreTrainedModel.from_pretrained = patch_pretrained_model
+            _BaseAutoModelClass.from_pretrained = patch_auto_model
+        if include_buffers:
+            nn.Module.register_buffer = register_buffer
+            for func in tensor_constructors_to_patch.keys():
+                tensor_constructor = patch_tensor_constructor(
+                    getattr(torch, func))
+                setattr(torch, func, tensor_constructor)
+        yield
+    finally:
+        nn.Module.register_parameter = old_register_parameter
+        mmengine_load.load_checkpoint = old_load_checkpoint
+        if with_transformers:
+            PreTrainedModel.from_pretrained = old_pretrained_model
+            _BaseAutoModelClass.from_pretrained = old_auto_model
+        if include_buffers:
+            nn.Module.register_buffer = old_register_buffer
+            for func, ori in tensor_constructors_to_patch.items():
+                setattr(torch, func, ori)
+def compute_module_sizes(
+        model: nn.Module,
+        dtype: Union[str, torch.dtype, None] = None,
+        special_dtypes: Optional[Dict[str, Union[str, torch.dtype]]] = None):
+    """Compute the size of each submodule of a given model."""
+    def get_dtype(dtype):
+        if isinstance(dtype, str):
+            dtype = getattr(torch, dtype)
+        if dtype is not None:
+            assert issubclass(dtype, torch.dtype)
+        return dtype
+    def dtype_bytes(dtype: torch.dtype):
+        if dtype is torch.bool:
+            return 1
+        if dtype.is_floating_point:
+            return torch.finfo(dtype).bits / 8
+        else:
+            return torch.iinfo(dtype).bits / 8
+    if dtype is not None:
+        dtype = get_dtype(dtype)
+        dtype_size = dtype_bytes(dtype)
+    if special_dtypes is not None:
+        special_dtypes = {
+            key: dtype_bytes(dtype)
+            for key, dtype in special_dtypes.items()
+        }
+    module_sizes = defaultdict(int)
+    for name, tensor in chain(
+            model.named_parameters(recurse=True),
+            model.named_buffers(recurse=True)):
+        if special_dtypes is not None and name in special_dtypes:
+            size = tensor.numel() * special_dtypes[name]
+        elif dtype is None:
+            size = tensor.numel() * tensor.element_size()
+        else:
+            size = tensor.numel() * min(dtype_size, tensor.element_size())
+        name_parts = name.split('.')
+        for idx in range(len(name_parts) + 1):
+            module_sizes['.'.join(name_parts[:idx])] += size
+    return module_sizes

mmpretrain/apis/visual_grounding.py ADDED Viewed

	@@ -0,0 +1,180 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from pathlib import Path
+from typing import Callable, List, Optional, Union
+import numpy as np
+from mmcv.image import imread
+from mmengine.config import Config
+from mmengine.dataset import Compose, default_collate
+from mmpretrain.registry import TRANSFORMS
+from mmpretrain.structures import DataSample
+from .base import BaseInferencer
+from .model import list_models
+class VisualGroundingInferencer(BaseInferencer):
+    """The inferencer for visual grounding.
+    Args:
+        model (BaseModel | str | Config): A model name or a path to the config
+            file, or a :obj:`BaseModel` object. The model name can be found
+            by ``VisualGroundingInferencer.list_models()`` and you can also
+            query it in :doc:`/modelzoo_statistics`.
+        pretrained (str, optional): Path to the checkpoint. If None, it will
+            try to find a pre-defined weight from the model you specified
+            (only work if the ``model`` is a model name). Defaults to None.
+        device (str, optional): Device to run inference. If None, the available
+            device will be automatically used. Defaults to None.
+        **kwargs: Other keyword arguments to initialize the model (only work if
+            the ``model`` is a model name).
+    Example:
+        >>> from mmpretrain import VisualGroundingInferencer
+        >>> inferencer = VisualGroundingInferencer('ofa-base_3rdparty_refcoco')
+        >>> inferencer('demo/cat-dog.png', 'dog')[0]
+        {'pred_bboxes': tensor([[ 36.6000,  29.6000, 355.8000, 395.2000]])}
+    """  # noqa: E501
+    visualize_kwargs: set = {
+        'resize', 'show', 'show_dir', 'wait_time', 'line_width', 'bbox_color'
+    }
+    def __call__(self,
+                 images: Union[str, np.ndarray, list],
+                 texts: Union[str, list],
+                 return_datasamples: bool = False,
+                 batch_size: int = 1,
+                 **kwargs) -> dict:
+        """Call the inferencer.
+        Args:
+            images (str | array | list): The image path or array, or a list of
+                images.
+            texts (str | list): The text to do visual grounding.
+            return_datasamples (bool): Whether to return results as
+                :obj:`DataSample`. Defaults to False.
+            batch_size (int): Batch size. Defaults to 1.
+            resize (int, optional): Resize the short edge of the image to the
+                specified length before visualization. Defaults to None.
+            draw_score (bool): Whether to draw the prediction scores
+                of prediction categories. Defaults to True.
+            show (bool): Whether to display the visualization result in a
+                window. Defaults to False.
+            wait_time (float): The display time (s). Defaults to 0, which means
+                "forever".
+            show_dir (str, optional): If not None, save the visualization
+                results in the specified directory. Defaults to None.
+            line_width (int): The line width of the bbox. Defaults to 3.
+            bbox_color (str | tuple): The color of the bbox.
+                Defaults to 'green'.
+        Returns:
+            list: The inference results.
+        """
+        if not isinstance(images, (list, tuple)):
+            assert isinstance(texts, str)
+            inputs = [{'img': images, 'text': texts}]
+        else:
+            inputs = []
+            for i in range(len(images)):
+                input_ = {'img': images[i], 'text': texts[i]}
+                inputs.append(input_)
+        return super().__call__(inputs, return_datasamples, batch_size,
+                                **kwargs)
+    def _init_pipeline(self, cfg: Config) -> Callable:
+        test_pipeline_cfg = cfg.test_dataloader.dataset.pipeline
+        if test_pipeline_cfg[0]['type'] == 'LoadImageFromFile':
+            # Image loading is finished in `self.preprocess`.
+            test_pipeline_cfg = test_pipeline_cfg[1:]
+        test_pipeline = Compose(
+            [TRANSFORMS.build(t) for t in test_pipeline_cfg])
+        return test_pipeline
+    def preprocess(self, inputs: List[dict], batch_size: int = 1):
+        def load_image(input_: dict):
+            img = imread(input_['img'])
+            if img is None:
+                raise ValueError(f'Failed to read image {input_}.')
+            return {**input_, 'img': img}
+        pipeline = Compose([load_image, self.pipeline])
+        chunked_data = self._get_chunk_data(map(pipeline, inputs), batch_size)
+        yield from map(default_collate, chunked_data)
+    def visualize(self,
+                  ori_inputs: List[dict],
+                  preds: List[DataSample],
+                  show: bool = False,
+                  wait_time: int = 0,
+                  resize: Optional[int] = None,
+                  line_width: int = 3,
+                  bbox_color: Union[str, tuple] = 'green',
+                  show_dir=None):
+        if not show and show_dir is None:
+            return None
+        if self.visualizer is None:
+            from mmpretrain.visualization import UniversalVisualizer
+            self.visualizer = UniversalVisualizer()
+        visualization = []
+        for i, (input_, data_sample) in enumerate(zip(ori_inputs, preds)):
+            image = imread(input_['img'])
+            if isinstance(input_['img'], str):
+                # The image loaded from path is BGR format.
+                image = image[..., ::-1]
+                name = Path(input_['img']).stem
+            else:
+                name = str(i)
+            if show_dir is not None:
+                show_dir = Path(show_dir)
+                show_dir.mkdir(exist_ok=True)
+                out_file = str((show_dir / name).with_suffix('.png'))
+            else:
+                out_file = None
+            self.visualizer.visualize_visual_grounding(
+                image,
+                data_sample,
+                resize=resize,
+                show=show,
+                wait_time=wait_time,
+                line_width=line_width,
+                bbox_color=bbox_color,
+                name=name,
+                out_file=out_file)
+            visualization.append(self.visualizer.get_image())
+        if show:
+            self.visualizer.close()
+        return visualization
+    def postprocess(self,
+                    preds: List[DataSample],
+                    visualization: List[np.ndarray],
+                    return_datasamples=False) -> dict:
+        if return_datasamples:
+            return preds
+        results = []
+        for data_sample in preds:
+            results.append({'pred_bboxes': data_sample.get('pred_bboxes')})
+        return results
+    @staticmethod
+    def list_models(pattern: Optional[str] = None):
+        """List all available model names.
+        Args:
+            pattern (str | None): A wildcard pattern to match model names.
+        Returns:
+            List[str]: a list of model names.
+        """
+        return list_models(pattern=pattern, task='Visual Grounding')

mmpretrain/apis/visual_question_answering.py ADDED Viewed

	@@ -0,0 +1,181 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from pathlib import Path
+from typing import Callable, List, Optional, Union
+import numpy as np
+from mmcv.image import imread
+from mmengine.config import Config
+from mmengine.dataset import Compose, default_collate
+from mmpretrain.registry import TRANSFORMS
+from mmpretrain.structures import DataSample
+from .base import BaseInferencer
+from .model import list_models
+class VisualQuestionAnsweringInferencer(BaseInferencer):
+    """The inferencer for visual question answering.
+    Args:
+        model (BaseModel | str | Config): A model name or a path to the config
+            file, or a :obj:`BaseModel` object. The model name can be found
+            by ``VisualQuestionAnsweringInferencer.list_models()`` and you can
+            also query it in :doc:`/modelzoo_statistics`.
+        pretrained (str, optional): Path to the checkpoint. If None, it will
+            try to find a pre-defined weight from the model you specified
+            (only work if the ``model`` is a model name). Defaults to None.
+        device (str, optional): Device to run inference. If None, the available
+            device will be automatically used. Defaults to None.
+        **kwargs: Other keyword arguments to initialize the model (only work if
+            the ``model`` is a model name).
+    Example:
+        >>> from mmpretrain import VisualQuestionAnsweringInferencer
+        >>> inferencer = VisualQuestionAnsweringInferencer('ofa-base_3rdparty-zeroshot_vqa')
+        >>> inferencer('demo/cat-dog.png', "What's the animal next to the dog?")[0]
+        {'question': "What's the animal next to the dog?", 'pred_answer': 'cat'}
+    """  # noqa: E501
+    visualize_kwargs: set = {'resize', 'show', 'show_dir', 'wait_time'}
+    def __call__(self,
+                 images: Union[str, np.ndarray, list],
+                 questions: Union[str, list],
+                 return_datasamples: bool = False,
+                 batch_size: int = 1,
+                 objects: Optional[List[str]] = None,
+                 **kwargs) -> dict:
+        """Call the inferencer.
+        Args:
+            images (str | array | list): The image path or array, or a list of
+                images.
+            questions (str | list): The question to the correspondding image.
+            return_datasamples (bool): Whether to return results as
+                :obj:`DataSample`. Defaults to False.
+            batch_size (int): Batch size. Defaults to 1.
+            objects (List[List[str]], optional): Some algorithms like OFA
+                fine-tuned VQA models requires extra object description list
+                for every image. Defaults to None.
+            resize (int, optional): Resize the short edge of the image to the
+                specified length before visualization. Defaults to None.
+            show (bool): Whether to display the visualization result in a
+                window. Defaults to False.
+            wait_time (float): The display time (s). Defaults to 0, which means
+                "forever".
+            show_dir (str, optional): If not None, save the visualization
+                results in the specified directory. Defaults to None.
+        Returns:
+            list: The inference results.
+        """
+        if not isinstance(images, (list, tuple)):
+            assert isinstance(questions, str)
+            inputs = [{'img': images, 'question': questions}]
+            if objects is not None:
+                assert isinstance(objects[0], str)
+                inputs[0]['objects'] = objects
+        else:
+            inputs = []
+            for i in range(len(images)):
+                input_ = {'img': images[i], 'question': questions[i]}
+                if objects is not None:
+                    input_['objects'] = objects[i]
+                inputs.append(input_)
+        return super().__call__(inputs, return_datasamples, batch_size,
+                                **kwargs)
+    def _init_pipeline(self, cfg: Config) -> Callable:
+        test_pipeline_cfg = cfg.test_dataloader.dataset.pipeline
+        if test_pipeline_cfg[0]['type'] == 'LoadImageFromFile':
+            # Image loading is finished in `self.preprocess`.
+            test_pipeline_cfg = test_pipeline_cfg[1:]
+        test_pipeline = Compose(
+            [TRANSFORMS.build(t) for t in test_pipeline_cfg])
+        return test_pipeline
+    def preprocess(self, inputs: List[dict], batch_size: int = 1):
+        def load_image(input_: dict):
+            img = imread(input_['img'])
+            if img is None:
+                raise ValueError(f'Failed to read image {input_}.')
+            return {**input_, 'img': img}
+        pipeline = Compose([load_image, self.pipeline])
+        chunked_data = self._get_chunk_data(map(pipeline, inputs), batch_size)
+        yield from map(default_collate, chunked_data)
+    def visualize(self,
+                  ori_inputs: List[dict],
+                  preds: List[DataSample],
+                  show: bool = False,
+                  wait_time: int = 0,
+                  resize: Optional[int] = None,
+                  show_dir=None):
+        if not show and show_dir is None:
+            return None
+        if self.visualizer is None:
+            from mmpretrain.visualization import UniversalVisualizer
+            self.visualizer = UniversalVisualizer()
+        visualization = []
+        for i, (input_, data_sample) in enumerate(zip(ori_inputs, preds)):
+            image = imread(input_['img'])
+            if isinstance(input_['img'], str):
+                # The image loaded from path is BGR format.
+                image = image[..., ::-1]
+                name = Path(input_['img']).stem
+            else:
+                name = str(i)
+            if show_dir is not None:
+                show_dir = Path(show_dir)
+                show_dir.mkdir(exist_ok=True)
+                out_file = str((show_dir / name).with_suffix('.png'))
+            else:
+                out_file = None
+            self.visualizer.visualize_vqa(
+                image,
+                data_sample,
+                resize=resize,
+                show=show,
+                wait_time=wait_time,
+                name=name,
+                out_file=out_file)
+            visualization.append(self.visualizer.get_image())
+        if show:
+            self.visualizer.close()
+        return visualization
+    def postprocess(self,
+                    preds: List[DataSample],
+                    visualization: List[np.ndarray],
+                    return_datasamples=False) -> dict:
+        if return_datasamples:
+            return preds
+        results = []
+        for data_sample in preds:
+            results.append({
+                'question': data_sample.get('question'),
+                'pred_answer': data_sample.get('pred_answer'),
+            })
+        return results
+    @staticmethod
+    def list_models(pattern: Optional[str] = None):
+        """List all available model names.
+        Args:
+            pattern (str | None): A wildcard pattern to match model names.
+        Returns:
+            List[str]: a list of model names.
+        """
+        return list_models(pattern=pattern, task='Visual Question Answering')

mmpretrain/datasets/__init__.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmpretrain.utils.dependency import WITH_MULTIMODAL
+from .base_dataset import BaseDataset
+from .builder import build_dataset
+from .caltech101 import Caltech101
+from .cifar import CIFAR10, CIFAR100
+from .cub import CUB
+from .custom import CustomDataset
+from .dataset_wrappers import KFoldDataset
+from .dtd import DTD
+from .fgvcaircraft import FGVCAircraft
+from .flowers102 import Flowers102
+from .food101 import Food101
+from .imagenet import ImageNet, ImageNet21k
+from .inshop import InShop
+from .mnist import MNIST, FashionMNIST
+from .multi_label import MultiLabelDataset
+from .multi_task import MultiTaskDataset
+from .nlvr2 import NLVR2
+from .oxfordiiitpet import OxfordIIITPet
+from .places205 import Places205
+from .samplers import *  # noqa: F401,F403
+from .stanfordcars import StanfordCars
+from .sun397 import SUN397
+from .transforms import *  # noqa: F401,F403
+from .voc import VOC
+__all__ = [
+    'BaseDataset', 'CIFAR10', 'CIFAR100', 'CUB', 'Caltech101', 'CustomDataset',
+    'DTD', 'FGVCAircraft', 'FashionMNIST', 'Flowers102', 'Food101', 'ImageNet',
+    'ImageNet21k', 'InShop', 'KFoldDataset', 'MNIST', 'MultiLabelDataset',
+    'MultiTaskDataset', 'NLVR2', 'OxfordIIITPet', 'Places205', 'SUN397',
+    'StanfordCars', 'VOC', 'build_dataset'
+]
+if WITH_MULTIMODAL:
+    from .coco_caption import COCOCaption
+    from .coco_retrieval import COCORetrieval
+    from .coco_vqa import COCOVQA
+    from .flamingo import FlamingoEvalCOCOCaption, FlamingoEvalCOCOVQA
+    from .refcoco import RefCOCO
+    from .scienceqa import ScienceQA
+    from .visual_genome import VisualGenomeQA
+    __all__.extend([
+        'COCOCaption',
+        'COCORetrieval',
+        'COCOVQA',
+        'FlamingoEvalCOCOCaption',
+        'FlamingoEvalCOCOVQA',
+        'RefCOCO',
+        'VisualGenomeQA',
+        'ScienceQA',
+    ])

mmpretrain/datasets/base_dataset.py ADDED Viewed

	@@ -0,0 +1,219 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+from os import PathLike
+from typing import List, Optional, Sequence, Union
+import mmengine
+import numpy as np
+from mmengine.dataset import BaseDataset as _BaseDataset
+from mmpretrain.registry import DATASETS, TRANSFORMS
+def expanduser(path):
+    """Expand ~ and ~user constructions.
+    If user or $HOME is unknown, do nothing.
+    """
+    if isinstance(path, (str, PathLike)):
+        return osp.expanduser(path)
+    else:
+        return path
+@DATASETS.register_module()
+class BaseDataset(_BaseDataset):
+    """Base dataset for image classification task.
+    This dataset support annotation file in `OpenMMLab 2.0 style annotation
+    format`.
+    .. _OpenMMLab 2.0 style annotation format:
+        https://github.com/open-mmlab/mmengine/blob/main/docs/zh_cn/tutorials/basedataset.md
+    Comparing with the :class:`mmengine.BaseDataset`, this class implemented
+    several useful methods.
+    Args:
+        ann_file (str): Annotation file path.
+        metainfo (dict, optional): Meta information for dataset, such as class
+            information. Defaults to None.
+        data_root (str): The root directory for ``data_prefix`` and
+            ``ann_file``. Defaults to ''.
+        data_prefix (str | dict): Prefix for training data. Defaults to ''.
+        filter_cfg (dict, optional): Config for filter data. Defaults to None.
+        indices (int or Sequence[int], optional): Support using first few
+            data in annotation file to facilitate training/testing on a smaller
+            dataset. Defaults to None, which means using all ``data_infos``.
+        serialize_data (bool): Whether to hold memory using serialized objects,
+            when enabled, data loader workers can use shared RAM from master
+            process instead of making a copy. Defaults to True.
+        pipeline (Sequence): Processing pipeline. Defaults to an empty tuple.
+        test_mode (bool, optional): ``test_mode=True`` means in test phase,
+            an error will be raised when getting an item fails, ``test_mode=False``
+            means in training phase, another item will be returned randomly.
+            Defaults to False.
+        lazy_init (bool): Whether to load annotation during instantiation.
+            In some cases, such as visualization, only the meta information of
+            the dataset is needed, which is not necessary to load annotation
+            file. ``Basedataset`` can skip load annotations to save time by set
+            ``lazy_init=False``. Defaults to False.
+        max_refetch (int): If ``Basedataset.prepare_data`` get a None img.
+            The maximum extra number of cycles to get a valid image.
+            Defaults to 1000.
+        classes (str | Sequence[str], optional): Specify names of classes.
+            - If is string, it should be a file path, and the every line of
+              the file is a name of a class.
+            - If is a sequence of string, every item is a name of class.
+            - If is None, use categories information in ``metainfo`` argument,
+              annotation file or the class attribute ``METAINFO``.
+            Defaults to None.
+    """  # noqa: E501
+    def __init__(self,
+                 ann_file: str,
+                 metainfo: Optional[dict] = None,
+                 data_root: str = '',
+                 data_prefix: Union[str, dict] = '',
+                 filter_cfg: Optional[dict] = None,
+                 indices: Optional[Union[int, Sequence[int]]] = None,
+                 serialize_data: bool = True,
+                 pipeline: Sequence = (),
+                 test_mode: bool = False,
+                 lazy_init: bool = False,
+                 max_refetch: int = 1000,
+                 classes: Union[str, Sequence[str], None] = None):
+        if isinstance(data_prefix, str):
+            data_prefix = dict(img_path=expanduser(data_prefix))
+        ann_file = expanduser(ann_file)
+        metainfo = self._compat_classes(metainfo, classes)
+        transforms = []
+        for transform in pipeline:
+            if isinstance(transform, dict):
+                transforms.append(TRANSFORMS.build(transform))
+            else:
+                transforms.append(transform)
+        super().__init__(
+            ann_file=ann_file,
+            metainfo=metainfo,
+            data_root=data_root,
+            data_prefix=data_prefix,
+            filter_cfg=filter_cfg,
+            indices=indices,
+            serialize_data=serialize_data,
+            pipeline=transforms,
+            test_mode=test_mode,
+            lazy_init=lazy_init,
+            max_refetch=max_refetch)
+    @property
+    def img_prefix(self):
+        """The prefix of images."""
+        return self.data_prefix['img_path']
+    @property
+    def CLASSES(self):
+        """Return all categories names."""
+        return self._metainfo.get('classes', None)
+    @property
+    def class_to_idx(self):
+        """Map mapping class name to class index.
+        Returns:
+            dict: mapping from class name to class index.
+        """
+        return {cat: i for i, cat in enumerate(self.CLASSES)}
+    def get_gt_labels(self):
+        """Get all ground-truth labels (categories).
+        Returns:
+            np.ndarray: categories for all images.
+        """
+        gt_labels = np.array(
+            [self.get_data_info(i)['gt_label'] for i in range(len(self))])
+        return gt_labels
+    def get_cat_ids(self, idx: int) -> List[int]:
+        """Get category id by index.
+        Args:
+            idx (int): Index of data.
+        Returns:
+            cat_ids (List[int]): Image category of specified index.
+        """
+        return [int(self.get_data_info(idx)['gt_label'])]
+    def _compat_classes(self, metainfo, classes):
+        """Merge the old style ``classes`` arguments to ``metainfo``."""
+        if isinstance(classes, str):
+            # take it as a file path
+            class_names = mmengine.list_from_file(expanduser(classes))
+        elif isinstance(classes, (tuple, list)):
+            class_names = classes
+        elif classes is not None:
+            raise ValueError(f'Unsupported type {type(classes)} of classes.')
+        if metainfo is None:
+            metainfo = {}
+        if classes is not None:
+            metainfo = {'classes': tuple(class_names), **metainfo}
+        return metainfo
+    def full_init(self):
+        """Load annotation file and set ``BaseDataset._fully_initialized`` to
+        True."""
+        super().full_init()
+        #  To support the standard OpenMMLab 2.0 annotation format. Generate
+        #  metainfo in internal format from standard metainfo format.
+        if 'categories' in self._metainfo and 'classes' not in self._metainfo:
+            categories = sorted(
+                self._metainfo['categories'], key=lambda x: x['id'])
+            self._metainfo['classes'] = tuple(
+                [cat['category_name'] for cat in categories])
+    def __repr__(self):
+        """Print the basic information of the dataset.
+        Returns:
+            str: Formatted string.
+        """
+        head = 'Dataset ' + self.__class__.__name__
+        body = []
+        if self._fully_initialized:
+            body.append(f'Number of samples: \t{self.__len__()}')
+        else:
+            body.append("Haven't been initialized")
+        if self.CLASSES is not None:
+            body.append(f'Number of categories: \t{len(self.CLASSES)}')
+        body.extend(self.extra_repr())
+        if len(self.pipeline.transforms) > 0:
+            body.append('With transforms:')
+            for t in self.pipeline.transforms:
+                body.append(f'    {t}')
+        lines = [head] + [' ' * 4 + line for line in body]
+        return '\n'.join(lines)
+    def extra_repr(self) -> List[str]:
+        """The extra repr information of the dataset."""
+        body = []
+        body.append(f'Annotation file: \t{self.ann_file}')
+        body.append(f'Prefix of images: \t{self.img_prefix}')
+        return body

mmpretrain/datasets/builder.py ADDED Viewed

	@@ -0,0 +1,25 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmpretrain.registry import DATASETS
+def build_dataset(cfg):
+    """Build dataset.
+    Examples:
+        >>> from mmpretrain.datasets import build_dataset
+        >>> mnist_train = build_dataset(
+        ...     dict(type='MNIST', data_prefix='data/mnist/', test_mode=False))
+        >>> print(mnist_train)
+        Dataset MNIST
+            Number of samples:  60000
+            Number of categories:       10
+            Prefix of data:     data/mnist/
+        >>> mnist_test = build_dataset(
+        ...     dict(type='MNIST', data_prefix='data/mnist/', test_mode=True))
+        >>> print(mnist_test)
+        Dataset MNIST
+            Number of samples:  10000
+            Number of categories:       10
+            Prefix of data:     data/mnist/
+    """
+    return DATASETS.build(cfg)

mmpretrain/datasets/caltech101.py ADDED Viewed

	@@ -0,0 +1,113 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import List
+from mmengine import get_file_backend, list_from_file
+from mmpretrain.registry import DATASETS
+from .base_dataset import BaseDataset
+from .categories import CALTECH101_CATEGORIES
+@DATASETS.register_module()
+class Caltech101(BaseDataset):
+    """The Caltech101 Dataset.
+    Support the `Caltech101 <https://data.caltech.edu/records/mzrjq-6wc02>`_ Dataset.
+    After downloading and decompression, the dataset directory structure is as follows.
+    Caltech101 dataset directory: ::
+        caltech-101
+        ├── 101_ObjectCategories
+        │   ├── class_x
+        │   │   ├── xx1.jpg
+        │   │   ├── xx2.jpg
+        │   │   └── ...
+        │   ├── class_y
+        │   │   ├── yy1.jpg
+        │   │   ├── yy2.jpg
+        │   │   └── ...
+        │   └── ...
+        ├── Annotations
+        │   ├── class_x
+        │   │   ├── xx1.mat
+        │   │   └── ...
+        │   └── ...
+        ├── meta
+        │   ├── train.txt
+        │   └── test.txt
+        └── ....
+    Please note that since there is no official splitting for training and
+    test set, you can use the train.txt and text.txt provided by us or
+    create your own annotation files. Here is the download
+    `link <https://download.openmmlab.com/mmpretrain/datasets/caltech_meta.zip>`_
+    for the annotations.
+    Args:
+        data_root (str): The root directory for the Caltech101 dataset.
+        split (str, optional): The dataset split, supports "train" and "test".
+            Default to "train".
+    Examples:
+        >>> from mmpretrain.datasets import Caltech101
+        >>> train_dataset = Caltech101(data_root='data/caltech-101', split='train')
+        >>> train_dataset
+        Dataset Caltech101
+            Number of samples:  3060
+            Number of categories:       102
+            Root of dataset:    data/caltech-101
+        >>> test_dataset = Caltech101(data_root='data/caltech-101', split='test')
+        >>> test_dataset
+        Dataset Caltech101
+            Number of samples:  6728
+            Number of categories:       102
+            Root of dataset:    data/caltech-101
+    """  # noqa: E501
+    METAINFO = {'classes': CALTECH101_CATEGORIES}
+    def __init__(self, data_root: str, split: str = 'train', **kwargs):
+        splits = ['train', 'test']
+        assert split in splits, \
+            f"The split must be one of {splits}, but get '{split}'"
+        self.split = split
+        self.backend = get_file_backend(data_root, enable_singleton=True)
+        if split == 'train':
+            ann_file = self.backend.join_path('meta', 'train.txt')
+        else:
+            ann_file = self.backend.join_path('meta', 'test.txt')
+        data_prefix = '101_ObjectCategories'
+        test_mode = split == 'test'
+        super(Caltech101, self).__init__(
+            ann_file=ann_file,
+            data_root=data_root,
+            data_prefix=data_prefix,
+            test_mode=test_mode,
+            **kwargs)
+    def load_data_list(self):
+        """Load images and ground truth labels."""
+        pairs = list_from_file(self.ann_file)
+        data_list = []
+        for pair in pairs:
+            path, gt_label = pair.split()
+            img_path = self.backend.join_path(self.img_prefix, path)
+            info = dict(img_path=img_path, gt_label=int(gt_label))
+            data_list.append(info)
+        return data_list
+    def extra_repr(self) -> List[str]:
+        """The extra repr information of the dataset."""
+        body = [
+            f'Root of dataset: \t{self.data_root}',
+        ]
+        return body

mmpretrain/datasets/categories.py ADDED Viewed

	@@ -0,0 +1,1440 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+# Pre-defined categories names of various datasets.
+VOC2007_CATEGORIES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
+                      'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
+                      'horse', 'motorbike', 'person', 'pottedplant', 'sheep',
+                      'sofa', 'train', 'tvmonitor')
+CUB_CATEGORIES = (
+    'Black_footed_Albatross', 'Laysan_Albatross', 'Sooty_Albatross',
+    'Groove_billed_Ani', 'Crested_Auklet', 'Least_Auklet', 'Parakeet_Auklet',
+    'Rhinoceros_Auklet', 'Brewer_Blackbird', 'Red_winged_Blackbird',
+    'Rusty_Blackbird', 'Yellow_headed_Blackbird', 'Bobolink', 'Indigo_Bunting',
+    'Lazuli_Bunting', 'Painted_Bunting', 'Cardinal', 'Spotted_Catbird',
+    'Gray_Catbird', 'Yellow_breasted_Chat', 'Eastern_Towhee',
+    'Chuck_will_Widow', 'Brandt_Cormorant', 'Red_faced_Cormorant',
+    'Pelagic_Cormorant', 'Bronzed_Cowbird', 'Shiny_Cowbird', 'Brown_Creeper',
+    'American_Crow', 'Fish_Crow', 'Black_billed_Cuckoo', 'Mangrove_Cuckoo',
+    'Yellow_billed_Cuckoo', 'Gray_crowned_Rosy_Finch', 'Purple_Finch',
+    'Northern_Flicker', 'Acadian_Flycatcher', 'Great_Crested_Flycatcher',
+    'Least_Flycatcher', 'Olive_sided_Flycatcher', 'Scissor_tailed_Flycatcher',
+    'Vermilion_Flycatcher', 'Yellow_bellied_Flycatcher', 'Frigatebird',
+    'Northern_Fulmar', 'Gadwall', 'American_Goldfinch', 'European_Goldfinch',
+    'Boat_tailed_Grackle', 'Eared_Grebe', 'Horned_Grebe', 'Pied_billed_Grebe',
+    'Western_Grebe', 'Blue_Grosbeak', 'Evening_Grosbeak', 'Pine_Grosbeak',
+    'Rose_breasted_Grosbeak', 'Pigeon_Guillemot', 'California_Gull',
+    'Glaucous_winged_Gull', 'Heermann_Gull', 'Herring_Gull', 'Ivory_Gull',
+    'Ring_billed_Gull', 'Slaty_backed_Gull', 'Western_Gull',
+    'Anna_Hummingbird', 'Ruby_throated_Hummingbird', 'Rufous_Hummingbird',
+    'Green_Violetear', 'Long_tailed_Jaeger', 'Pomarine_Jaeger', 'Blue_Jay',
+    'Florida_Jay', 'Green_Jay', 'Dark_eyed_Junco', 'Tropical_Kingbird',
+    'Gray_Kingbird', 'Belted_Kingfisher', 'Green_Kingfisher',
+    'Pied_Kingfisher', 'Ringed_Kingfisher', 'White_breasted_Kingfisher',
+    'Red_legged_Kittiwake', 'Horned_Lark', 'Pacific_Loon', 'Mallard',
+    'Western_Meadowlark', 'Hooded_Merganser', 'Red_breasted_Merganser',
+    'Mockingbird', 'Nighthawk', 'Clark_Nutcracker', 'White_breasted_Nuthatch',
+    'Baltimore_Oriole', 'Hooded_Oriole', 'Orchard_Oriole', 'Scott_Oriole',
+    'Ovenbird', 'Brown_Pelican', 'White_Pelican', 'Western_Wood_Pewee',
+    'Sayornis', 'American_Pipit', 'Whip_poor_Will', 'Horned_Puffin',
+    'Common_Raven', 'White_necked_Raven', 'American_Redstart', 'Geococcyx',
+    'Loggerhead_Shrike', 'Great_Grey_Shrike', 'Baird_Sparrow',
+    'Black_throated_Sparrow', 'Brewer_Sparrow', 'Chipping_Sparrow',
+    'Clay_colored_Sparrow', 'House_Sparrow', 'Field_Sparrow', 'Fox_Sparrow',
+    'Grasshopper_Sparrow', 'Harris_Sparrow', 'Henslow_Sparrow',
+    'Le_Conte_Sparrow', 'Lincoln_Sparrow', 'Nelson_Sharp_tailed_Sparrow',
+    'Savannah_Sparrow', 'Seaside_Sparrow', 'Song_Sparrow', 'Tree_Sparrow',
+    'Vesper_Sparrow', 'White_crowned_Sparrow', 'White_throated_Sparrow',
+    'Cape_Glossy_Starling', 'Bank_Swallow', 'Barn_Swallow', 'Cliff_Swallow',
+    'Tree_Swallow', 'Scarlet_Tanager', 'Summer_Tanager', 'Artic_Tern',
+    'Black_Tern', 'Caspian_Tern', 'Common_Tern', 'Elegant_Tern',
+    'Forsters_Tern', 'Least_Tern', 'Green_tailed_Towhee', 'Brown_Thrasher',
+    'Sage_Thrasher', 'Black_capped_Vireo', 'Blue_headed_Vireo',
+    'Philadelphia_Vireo', 'Red_eyed_Vireo', 'Warbling_Vireo',
+    'White_eyed_Vireo', 'Yellow_throated_Vireo', 'Bay_breasted_Warbler',
+    'Black_and_white_Warbler', 'Black_throated_Blue_Warbler',
+    'Blue_winged_Warbler', 'Canada_Warbler', 'Cape_May_Warbler',
+    'Cerulean_Warbler', 'Chestnut_sided_Warbler', 'Golden_winged_Warbler',
+    'Hooded_Warbler', 'Kentucky_Warbler', 'Magnolia_Warbler',
+    'Mourning_Warbler', 'Myrtle_Warbler', 'Nashville_Warbler',
+    'Orange_crowned_Warbler', 'Palm_Warbler', 'Pine_Warbler',
+    'Prairie_Warbler', 'Prothonotary_Warbler', 'Swainson_Warbler',
+    'Tennessee_Warbler', 'Wilson_Warbler', 'Worm_eating_Warbler',
+    'Yellow_Warbler', 'Northern_Waterthrush', 'Louisiana_Waterthrush',
+    'Bohemian_Waxwing', 'Cedar_Waxwing', 'American_Three_toed_Woodpecker',
+    'Pileated_Woodpecker', 'Red_bellied_Woodpecker', 'Red_cockaded_Woodpecker',
+    'Red_headed_Woodpecker', 'Downy_Woodpecker', 'Bewick_Wren', 'Cactus_Wren',
+    'Carolina_Wren', 'House_Wren', 'Marsh_Wren', 'Rock_Wren', 'Winter_Wren',
+    'Common_Yellowthroat')
+IMAGENET_CATEGORIES = (
+    'tench, Tinca tinca',
+    'goldfish, Carassius auratus',
+    'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias',  # noqa: E501
+    'tiger shark, Galeocerdo cuvieri',
+    'hammerhead, hammerhead shark',
+    'electric ray, crampfish, numbfish, torpedo',
+    'stingray',
+    'cock',
+    'hen',
+    'ostrich, Struthio camelus',
+    'brambling, Fringilla montifringilla',
+    'goldfinch, Carduelis carduelis',
+    'house finch, linnet, Carpodacus mexicanus',
+    'junco, snowbird',
+    'indigo bunting, indigo finch, indigo bird, Passerina cyanea',
+    'robin, American robin, Turdus migratorius',
+    'bulbul',
+    'jay',
+    'magpie',
+    'chickadee',
+    'water ouzel, dipper',
+    'kite',
+    'bald eagle, American eagle, Haliaeetus leucocephalus',
+    'vulture',
+    'great grey owl, great gray owl, Strix nebulosa',
+    'European fire salamander, Salamandra salamandra',
+    'common newt, Triturus vulgaris',
+    'eft',
+    'spotted salamander, Ambystoma maculatum',
+    'axolotl, mud puppy, Ambystoma mexicanum',
+    'bullfrog, Rana catesbeiana',
+    'tree frog, tree-frog',
+    'tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui',
+    'loggerhead, loggerhead turtle, Caretta caretta',
+    'leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea',  # noqa: E501
+    'mud turtle',
+    'terrapin',
+    'box turtle, box tortoise',
+    'banded gecko',
+    'common iguana, iguana, Iguana iguana',
+    'American chameleon, anole, Anolis carolinensis',
+    'whiptail, whiptail lizard',
+    'agama',
+    'frilled lizard, Chlamydosaurus kingi',
+    'alligator lizard',
+    'Gila monster, Heloderma suspectum',
+    'green lizard, Lacerta viridis',
+    'African chameleon, Chamaeleo chamaeleon',
+    'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis',  # noqa: E501
+    'African crocodile, Nile crocodile, Crocodylus niloticus',
+    'American alligator, Alligator mississipiensis',
+    'triceratops',
+    'thunder snake, worm snake, Carphophis amoenus',
+    'ringneck snake, ring-necked snake, ring snake',
+    'hognose snake, puff adder, sand viper',
+    'green snake, grass snake',
+    'king snake, kingsnake',
+    'garter snake, grass snake',
+    'water snake',
+    'vine snake',
+    'night snake, Hypsiglena torquata',
+    'boa constrictor, Constrictor constrictor',
+    'rock python, rock snake, Python sebae',
+    'Indian cobra, Naja naja',
+    'green mamba',
+    'sea snake',
+    'horned viper, cerastes, sand viper, horned asp, Cerastes cornutus',
+    'diamondback, diamondback rattlesnake, Crotalus adamanteus',
+    'sidewinder, horned rattlesnake, Crotalus cerastes',
+    'trilobite',
+    'harvestman, daddy longlegs, Phalangium opilio',
+    'scorpion',
+    'black and gold garden spider, Argiope aurantia',
+    'barn spider, Araneus cavaticus',
+    'garden spider, Aranea diademata',
+    'black widow, Latrodectus mactans',
+    'tarantula',
+    'wolf spider, hunting spider',
+    'tick',
+    'centipede',
+    'black grouse',
+    'ptarmigan',
+    'ruffed grouse, partridge, Bonasa umbellus',
+    'prairie chicken, prairie grouse, prairie fowl',
+    'peacock',
+    'quail',
+    'partridge',
+    'African grey, African gray, Psittacus erithacus',
+    'macaw',
+    'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita',
+    'lorikeet',
+    'coucal',
+    'bee eater',
+    'hornbill',
+    'hummingbird',
+    'jacamar',
+    'toucan',
+    'drake',
+    'red-breasted merganser, Mergus serrator',
+    'goose',
+    'black swan, Cygnus atratus',
+    'tusker',
+    'echidna, spiny anteater, anteater',
+    'platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus',  # noqa: E501
+    'wallaby, brush kangaroo',
+    'koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus',  # noqa: E501
+    'wombat',
+    'jellyfish',
+    'sea anemone, anemone',
+    'brain coral',
+    'flatworm, platyhelminth',
+    'nematode, nematode worm, roundworm',
+    'conch',
+    'snail',
+    'slug',
+    'sea slug, nudibranch',
+    'chiton, coat-of-mail shell, sea cradle, polyplacophore',
+    'chambered nautilus, pearly nautilus, nautilus',
+    'Dungeness crab, Cancer magister',
+    'rock crab, Cancer irroratus',
+    'fiddler crab',
+    'king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica',  # noqa: E501
+    'American lobster, Northern lobster, Maine lobster, Homarus americanus',  # noqa: E501
+    'spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish',  # noqa: E501
+    'crayfish, crawfish, crawdad, crawdaddy',
+    'hermit crab',
+    'isopod',
+    'white stork, Ciconia ciconia',
+    'black stork, Ciconia nigra',
+    'spoonbill',
+    'flamingo',
+    'little blue heron, Egretta caerulea',
+    'American egret, great white heron, Egretta albus',
+    'bittern',
+    'crane',
+    'limpkin, Aramus pictus',
+    'European gallinule, Porphyrio porphyrio',
+    'American coot, marsh hen, mud hen, water hen, Fulica americana',
+    'bustard',
+    'ruddy turnstone, Arenaria interpres',
+    'red-backed sandpiper, dunlin, Erolia alpina',
+    'redshank, Tringa totanus',
+    'dowitcher',
+    'oystercatcher, oyster catcher',
+    'pelican',
+    'king penguin, Aptenodytes patagonica',
+    'albatross, mollymawk',
+    'grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus',  # noqa: E501
+    'killer whale, killer, orca, grampus, sea wolf, Orcinus orca',
+    'dugong, Dugong dugon',
+    'sea lion',
+    'Chihuahua',
+    'Japanese spaniel',
+    'Maltese dog, Maltese terrier, Maltese',
+    'Pekinese, Pekingese, Peke',
+    'Shih-Tzu',
+    'Blenheim spaniel',
+    'papillon',
+    'toy terrier',
+    'Rhodesian ridgeback',
+    'Afghan hound, Afghan',
+    'basset, basset hound',
+    'beagle',
+    'bloodhound, sleuthhound',
+    'bluetick',
+    'black-and-tan coonhound',
+    'Walker hound, Walker foxhound',
+    'English foxhound',
+    'redbone',
+    'borzoi, Russian wolfhound',
+    'Irish wolfhound',
+    'Italian greyhound',
+    'whippet',
+    'Ibizan hound, Ibizan Podenco',
+    'Norwegian elkhound, elkhound',
+    'otterhound, otter hound',
+    'Saluki, gazelle hound',
+    'Scottish deerhound, deerhound',
+    'Weimaraner',
+    'Staffordshire bullterrier, Staffordshire bull terrier',
+    'American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier',  # noqa: E501
+    'Bedlington terrier',
+    'Border terrier',
+    'Kerry blue terrier',
+    'Irish terrier',
+    'Norfolk terrier',
+    'Norwich terrier',
+    'Yorkshire terrier',
+    'wire-haired fox terrier',
+    'Lakeland terrier',
+    'Sealyham terrier, Sealyham',
+    'Airedale, Airedale terrier',
+    'cairn, cairn terrier',
+    'Australian terrier',
+    'Dandie Dinmont, Dandie Dinmont terrier',
+    'Boston bull, Boston terrier',
+    'miniature schnauzer',
+    'giant schnauzer',
+    'standard schnauzer',
+    'Scotch terrier, Scottish terrier, Scottie',
+    'Tibetan terrier, chrysanthemum dog',
+    'silky terrier, Sydney silky',
+    'soft-coated wheaten terrier',
+    'West Highland white terrier',
+    'Lhasa, Lhasa apso',
+    'flat-coated retriever',
+    'curly-coated retriever',
+    'golden retriever',
+    'Labrador retriever',
+    'Chesapeake Bay retriever',
+    'German short-haired pointer',
+    'vizsla, Hungarian pointer',
+    'English setter',
+    'Irish setter, red setter',
+    'Gordon setter',
+    'Brittany spaniel',
+    'clumber, clumber spaniel',
+    'English springer, English springer spaniel',
+    'Welsh springer spaniel',
+    'cocker spaniel, English cocker spaniel, cocker',
+    'Sussex spaniel',
+    'Irish water spaniel',
+    'kuvasz',
+    'schipperke',
+    'groenendael',
+    'malinois',
+    'briard',
+    'kelpie',
+    'komondor',
+    'Old English sheepdog, bobtail',
+    'Shetland sheepdog, Shetland sheep dog, Shetland',
+    'collie',
+    'Border collie',
+    'Bouvier des Flandres, Bouviers des Flandres',
+    'Rottweiler',
+    'German shepherd, German shepherd dog, German police dog, alsatian',
+    'Doberman, Doberman pinscher',
+    'miniature pinscher',
+    'Greater Swiss Mountain dog',
+    'Bernese mountain dog',
+    'Appenzeller',
+    'EntleBucher',
+    'boxer',
+    'bull mastiff',
+    'Tibetan mastiff',
+    'French bulldog',
+    'Great Dane',
+    'Saint Bernard, St Bernard',
+    'Eskimo dog, husky',
+    'malamute, malemute, Alaskan malamute',
+    'Siberian husky',
+    'dalmatian, coach dog, carriage dog',
+    'affenpinscher, monkey pinscher, monkey dog',
+    'basenji',
+    'pug, pug-dog',
+    'Leonberg',
+    'Newfoundland, Newfoundland dog',
+    'Great Pyrenees',
+    'Samoyed, Samoyede',
+    'Pomeranian',
+    'chow, chow chow',
+    'keeshond',
+    'Brabancon griffon',
+    'Pembroke, Pembroke Welsh corgi',
+    'Cardigan, Cardigan Welsh corgi',
+    'toy poodle',
+    'miniature poodle',
+    'standard poodle',
+    'Mexican hairless',
+    'timber wolf, grey wolf, gray wolf, Canis lupus',
+    'white wolf, Arctic wolf, Canis lupus tundrarum',
+    'red wolf, maned wolf, Canis rufus, Canis niger',
+    'coyote, prairie wolf, brush wolf, Canis latrans',
+    'dingo, warrigal, warragal, Canis dingo',
+    'dhole, Cuon alpinus',
+    'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus',
+    'hyena, hyaena',
+    'red fox, Vulpes vulpes',
+    'kit fox, Vulpes macrotis',
+    'Arctic fox, white fox, Alopex lagopus',
+    'grey fox, gray fox, Urocyon cinereoargenteus',
+    'tabby, tabby cat',
+    'tiger cat',
+    'Persian cat',
+    'Siamese cat, Siamese',
+    'Egyptian cat',
+    'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor',  # noqa: E501
+    'lynx, catamount',
+    'leopard, Panthera pardus',
+    'snow leopard, ounce, Panthera uncia',
+    'jaguar, panther, Panthera onca, Felis onca',
+    'lion, king of beasts, Panthera leo',
+    'tiger, Panthera tigris',
+    'cheetah, chetah, Acinonyx jubatus',
+    'brown bear, bruin, Ursus arctos',
+    'American black bear, black bear, Ursus americanus, Euarctos americanus',  # noqa: E501
+    'ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus',
+    'sloth bear, Melursus ursinus, Ursus ursinus',
+    'mongoose',
+    'meerkat, mierkat',
+    'tiger beetle',
+    'ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle',
+    'ground beetle, carabid beetle',
+    'long-horned beetle, longicorn, longicorn beetle',
+    'leaf beetle, chrysomelid',
+    'dung beetle',
+    'rhinoceros beetle',
+    'weevil',
+    'fly',
+    'bee',
+    'ant, emmet, pismire',
+    'grasshopper, hopper',
+    'cricket',
+    'walking stick, walkingstick, stick insect',
+    'cockroach, roach',
+    'mantis, mantid',
+    'cicada, cicala',
+    'leafhopper',
+    'lacewing, lacewing fly',
+    "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",  # noqa: E501
+    'damselfly',
+    'admiral',
+    'ringlet, ringlet butterfly',
+    'monarch, monarch butterfly, milkweed butterfly, Danaus plexippus',
+    'cabbage butterfly',
+    'sulphur butterfly, sulfur butterfly',
+    'lycaenid, lycaenid butterfly',
+    'starfish, sea star',
+    'sea urchin',
+    'sea cucumber, holothurian',
+    'wood rabbit, cottontail, cottontail rabbit',
+    'hare',
+    'Angora, Angora rabbit',
+    'hamster',
+    'porcupine, hedgehog',
+    'fox squirrel, eastern fox squirrel, Sciurus niger',
+    'marmot',
+    'beaver',
+    'guinea pig, Cavia cobaya',
+    'sorrel',
+    'zebra',
+    'hog, pig, grunter, squealer, Sus scrofa',
+    'wild boar, boar, Sus scrofa',
+    'warthog',
+    'hippopotamus, hippo, river horse, Hippopotamus amphibius',
+    'ox',
+    'water buffalo, water ox, Asiatic buffalo, Bubalus bubalis',
+    'bison',
+    'ram, tup',
+    'bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis',  # noqa: E501
+    'ibex, Capra ibex',
+    'hartebeest',
+    'impala, Aepyceros melampus',
+    'gazelle',
+    'Arabian camel, dromedary, Camelus dromedarius',
+    'llama',
+    'weasel',
+    'mink',
+    'polecat, fitch, foulmart, foumart, Mustela putorius',
+    'black-footed ferret, ferret, Mustela nigripes',
+    'otter',
+    'skunk, polecat, wood pussy',
+    'badger',
+    'armadillo',
+    'three-toed sloth, ai, Bradypus tridactylus',
+    'orangutan, orang, orangutang, Pongo pygmaeus',
+    'gorilla, Gorilla gorilla',
+    'chimpanzee, chimp, Pan troglodytes',
+    'gibbon, Hylobates lar',
+    'siamang, Hylobates syndactylus, Symphalangus syndactylus',
+    'guenon, guenon monkey',
+    'patas, hussar monkey, Erythrocebus patas',
+    'baboon',
+    'macaque',
+    'langur',
+    'colobus, colobus monkey',
+    'proboscis monkey, Nasalis larvatus',
+    'marmoset',
+    'capuchin, ringtail, Cebus capucinus',
+    'howler monkey, howler',
+    'titi, titi monkey',
+    'spider monkey, Ateles geoffroyi',
+    'squirrel monkey, Saimiri sciureus',
+    'Madagascar cat, ring-tailed lemur, Lemur catta',
+    'indri, indris, Indri indri, Indri brevicaudatus',
+    'Indian elephant, Elephas maximus',
+    'African elephant, Loxodonta africana',
+    'lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens',
+    'giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca',
+    'barracouta, snoek',
+    'eel',
+    'coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch',  # noqa: E501
+    'rock beauty, Holocanthus tricolor',
+    'anemone fish',
+    'sturgeon',
+    'gar, garfish, garpike, billfish, Lepisosteus osseus',
+    'lionfish',
+    'puffer, pufferfish, blowfish, globefish',
+    'abacus',
+    'abaya',
+    "academic gown, academic robe, judge's robe",
+    'accordion, piano accordion, squeeze box',
+    'acoustic guitar',
+    'aircraft carrier, carrier, flattop, attack aircraft carrier',
+    'airliner',
+    'airship, dirigible',
+    'altar',
+    'ambulance',
+    'amphibian, amphibious vehicle',
+    'analog clock',
+    'apiary, bee house',
+    'apron',
+    'ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin',  # noqa: E501
+    'assault rifle, assault gun',
+    'backpack, back pack, knapsack, packsack, rucksack, haversack',
+    'bakery, bakeshop, bakehouse',
+    'balance beam, beam',
+    'balloon',
+    'ballpoint, ballpoint pen, ballpen, Biro',
+    'Band Aid',
+    'banjo',
+    'bannister, banister, balustrade, balusters, handrail',
+    'barbell',
+    'barber chair',
+    'barbershop',
+    'barn',
+    'barometer',
+    'barrel, cask',
+    'barrow, garden cart, lawn cart, wheelbarrow',
+    'baseball',
+    'basketball',
+    'bassinet',
+    'bassoon',
+    'bathing cap, swimming cap',
+    'bath towel',
+    'bathtub, bathing tub, bath, tub',
+    'beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon',  # noqa: E501
+    'beacon, lighthouse, beacon light, pharos',
+    'beaker',
+    'bearskin, busby, shako',
+    'beer bottle',
+    'beer glass',
+    'bell cote, bell cot',
+    'bib',
+    'bicycle-built-for-two, tandem bicycle, tandem',
+    'bikini, two-piece',
+    'binder, ring-binder',
+    'binoculars, field glasses, opera glasses',
+    'birdhouse',
+    'boathouse',
+    'bobsled, bobsleigh, bob',
+    'bolo tie, bolo, bola tie, bola',
+    'bonnet, poke bonnet',
+    'bookcase',
+    'bookshop, bookstore, bookstall',
+    'bottlecap',
+    'bow',
+    'bow tie, bow-tie, bowtie',
+    'brass, memorial tablet, plaque',
+    'brassiere, bra, bandeau',
+    'breakwater, groin, groyne, mole, bulwark, seawall, jetty',
+    'breastplate, aegis, egis',
+    'broom',
+    'bucket, pail',
+    'buckle',
+    'bulletproof vest',
+    'bullet train, bullet',
+    'butcher shop, meat market',
+    'cab, hack, taxi, taxicab',
+    'caldron, cauldron',
+    'candle, taper, wax light',
+    'cannon',
+    'canoe',
+    'can opener, tin opener',
+    'cardigan',
+    'car mirror',
+    'carousel, carrousel, merry-go-round, roundabout, whirligig',
+    "carpenter's kit, tool kit",
+    'carton',
+    'car wheel',
+    'cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM',  # noqa: E501
+    'cassette',
+    'cassette player',
+    'castle',
+    'catamaran',
+    'CD player',
+    'cello, violoncello',
+    'cellular telephone, cellular phone, cellphone, cell, mobile phone',
+    'chain',
+    'chainlink fence',
+    'chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour',  # noqa: E501
+    'chain saw, chainsaw',
+    'chest',
+    'chiffonier, commode',
+    'chime, bell, gong',
+    'china cabinet, china closet',
+    'Christmas stocking',
+    'church, church building',
+    'cinema, movie theater, movie theatre, movie house, picture palace',
+    'cleaver, meat cleaver, chopper',
+    'cliff dwelling',
+    'cloak',
+    'clog, geta, patten, sabot',
+    'cocktail shaker',
+    'coffee mug',
+    'coffeepot',
+    'coil, spiral, volute, whorl, helix',
+    'combination lock',
+    'computer keyboard, keypad',
+    'confectionery, confectionary, candy store',
+    'container ship, containership, container vessel',
+    'convertible',
+    'corkscrew, bottle screw',
+    'cornet, horn, trumpet, trump',
+    'cowboy boot',
+    'cowboy hat, ten-gallon hat',
+    'cradle',
+    'crane',
+    'crash helmet',
+    'crate',
+    'crib, cot',
+    'Crock Pot',
+    'croquet ball',
+    'crutch',
+    'cuirass',
+    'dam, dike, dyke',
+    'desk',
+    'desktop computer',
+    'dial telephone, dial phone',
+    'diaper, nappy, napkin',
+    'digital clock',
+    'digital watch',
+    'dining table, board',
+    'dishrag, dishcloth',
+    'dishwasher, dish washer, dishwashing machine',
+    'disk brake, disc brake',
+    'dock, dockage, docking facility',
+    'dogsled, dog sled, dog sleigh',
+    'dome',
+    'doormat, welcome mat',
+    'drilling platform, offshore rig',
+    'drum, membranophone, tympan',
+    'drumstick',
+    'dumbbell',
+    'Dutch oven',
+    'electric fan, blower',
+    'electric guitar',
+    'electric locomotive',
+    'entertainment center',
+    'envelope',
+    'espresso maker',
+    'face powder',
+    'feather boa, boa',
+    'file, file cabinet, filing cabinet',
+    'fireboat',
+    'fire engine, fire truck',
+    'fire screen, fireguard',
+    'flagpole, flagstaff',
+    'flute, transverse flute',
+    'folding chair',
+    'football helmet',
+    'forklift',
+    'fountain',
+    'fountain pen',
+    'four-poster',
+    'freight car',
+    'French horn, horn',
+    'frying pan, frypan, skillet',
+    'fur coat',
+    'garbage truck, dustcart',
+    'gasmask, respirator, gas helmet',
+    'gas pump, gasoline pump, petrol pump, island dispenser',
+    'goblet',
+    'go-kart',
+    'golf ball',
+    'golfcart, golf cart',
+    'gondola',
+    'gong, tam-tam',
+    'gown',
+    'grand piano, grand',
+    'greenhouse, nursery, glasshouse',
+    'grille, radiator grille',
+    'grocery store, grocery, food market, market',
+    'guillotine',
+    'hair slide',
+    'hair spray',
+    'half track',
+    'hammer',
+    'hamper',
+    'hand blower, blow dryer, blow drier, hair dryer, hair drier',
+    'hand-held computer, hand-held microcomputer',
+    'handkerchief, hankie, hanky, hankey',
+    'hard disc, hard disk, fixed disk',
+    'harmonica, mouth organ, harp, mouth harp',
+    'harp',
+    'harvester, reaper',
+    'hatchet',
+    'holster',
+    'home theater, home theatre',
+    'honeycomb',
+    'hook, claw',
+    'hoopskirt, crinoline',
+    'horizontal bar, high bar',
+    'horse cart, horse-cart',
+    'hourglass',
+    'iPod',
+    'iron, smoothing iron',
+    "jack-o'-lantern",
+    'jean, blue jean, denim',
+    'jeep, landrover',
+    'jersey, T-shirt, tee shirt',
+    'jigsaw puzzle',
+    'jinrikisha, ricksha, rickshaw',
+    'joystick',
+    'kimono',
+    'knee pad',
+    'knot',
+    'lab coat, laboratory coat',
+    'ladle',
+    'lampshade, lamp shade',
+    'laptop, laptop computer',
+    'lawn mower, mower',
+    'lens cap, lens cover',
+    'letter opener, paper knife, paperknife',
+    'library',
+    'lifeboat',
+    'lighter, light, igniter, ignitor',
+    'limousine, limo',
+    'liner, ocean liner',
+    'lipstick, lip rouge',
+    'Loafer',
+    'lotion',
+    'loudspeaker, speaker, speaker unit, loudspeaker system, speaker system',  # noqa: E501
+    "loupe, jeweler's loupe",
+    'lumbermill, sawmill',
+    'magnetic compass',
+    'mailbag, postbag',
+    'mailbox, letter box',
+    'maillot',
+    'maillot, tank suit',
+    'manhole cover',
+    'maraca',
+    'marimba, xylophone',
+    'mask',
+    'matchstick',
+    'maypole',
+    'maze, labyrinth',
+    'measuring cup',
+    'medicine chest, medicine cabinet',
+    'megalith, megalithic structure',
+    'microphone, mike',
+    'microwave, microwave oven',
+    'military uniform',
+    'milk can',
+    'minibus',
+    'miniskirt, mini',
+    'minivan',
+    'missile',
+    'mitten',
+    'mixing bowl',
+    'mobile home, manufactured home',
+    'Model T',
+    'modem',
+    'monastery',
+    'monitor',
+    'moped',
+    'mortar',
+    'mortarboard',
+    'mosque',
+    'mosquito net',
+    'motor scooter, scooter',
+    'mountain bike, all-terrain bike, off-roader',
+    'mountain tent',
+    'mouse, computer mouse',
+    'mousetrap',
+    'moving van',
+    'muzzle',
+    'nail',
+    'neck brace',
+    'necklace',
+    'nipple',
+    'notebook, notebook computer',
+    'obelisk',
+    'oboe, hautboy, hautbois',
+    'ocarina, sweet potato',
+    'odometer, hodometer, mileometer, milometer',
+    'oil filter',
+    'organ, pipe organ',
+    'oscilloscope, scope, cathode-ray oscilloscope, CRO',
+    'overskirt',
+    'oxcart',
+    'oxygen mask',
+    'packet',
+    'paddle, boat paddle',
+    'paddlewheel, paddle wheel',
+    'padlock',
+    'paintbrush',
+    "pajama, pyjama, pj's, jammies",
+    'palace',
+    'panpipe, pandean pipe, syrinx',
+    'paper towel',
+    'parachute, chute',
+    'parallel bars, bars',
+    'park bench',
+    'parking meter',
+    'passenger car, coach, carriage',
+    'patio, terrace',
+    'pay-phone, pay-station',
+    'pedestal, plinth, footstall',
+    'pencil box, pencil case',
+    'pencil sharpener',
+    'perfume, essence',
+    'Petri dish',
+    'photocopier',
+    'pick, plectrum, plectron',
+    'pickelhaube',
+    'picket fence, paling',
+    'pickup, pickup truck',
+    'pier',
+    'piggy bank, penny bank',
+    'pill bottle',
+    'pillow',
+    'ping-pong ball',
+    'pinwheel',
+    'pirate, pirate ship',
+    'pitcher, ewer',
+    "plane, carpenter's plane, woodworking plane",
+    'planetarium',
+    'plastic bag',
+    'plate rack',
+    'plow, plough',
+    "plunger, plumber's helper",
+    'Polaroid camera, Polaroid Land camera',
+    'pole',
+    'police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria',  # noqa: E501
+    'poncho',
+    'pool table, billiard table, snooker table',
+    'pop bottle, soda bottle',
+    'pot, flowerpot',
+    "potter's wheel",
+    'power drill',
+    'prayer rug, prayer mat',
+    'printer',
+    'prison, prison house',
+    'projectile, missile',
+    'projector',
+    'puck, hockey puck',
+    'punching bag, punch bag, punching ball, punchball',
+    'purse',
+    'quill, quill pen',
+    'quilt, comforter, comfort, puff',
+    'racer, race car, racing car',
+    'racket, racquet',
+    'radiator',
+    'radio, wireless',
+    'radio telescope, radio reflector',
+    'rain barrel',
+    'recreational vehicle, RV, R.V.',
+    'reel',
+    'reflex camera',
+    'refrigerator, icebox',
+    'remote control, remote',
+    'restaurant, eating house, eating place, eatery',
+    'revolver, six-gun, six-shooter',
+    'rifle',
+    'rocking chair, rocker',
+    'rotisserie',
+    'rubber eraser, rubber, pencil eraser',
+    'rugby ball',
+    'rule, ruler',
+    'running shoe',
+    'safe',
+    'safety pin',
+    'saltshaker, salt shaker',
+    'sandal',
+    'sarong',
+    'sax, saxophone',
+    'scabbard',
+    'scale, weighing machine',
+    'school bus',
+    'schooner',
+    'scoreboard',
+    'screen, CRT screen',
+    'screw',
+    'screwdriver',
+    'seat belt, seatbelt',
+    'sewing machine',
+    'shield, buckler',
+    'shoe shop, shoe-shop, shoe store',
+    'shoji',
+    'shopping basket',
+    'shopping cart',
+    'shovel',
+    'shower cap',
+    'shower curtain',
+    'ski',
+    'ski mask',
+    'sleeping bag',
+    'slide rule, slipstick',
+    'sliding door',
+    'slot, one-armed bandit',
+    'snorkel',
+    'snowmobile',
+    'snowplow, snowplough',
+    'soap dispenser',
+    'soccer ball',
+    'sock',
+    'solar dish, solar collector, solar furnace',
+    'sombrero',
+    'soup bowl',
+    'space bar',
+    'space heater',
+    'space shuttle',
+    'spatula',
+    'speedboat',
+    "spider web, spider's web",
+    'spindle',
+    'sports car, sport car',
+    'spotlight, spot',
+    'stage',
+    'steam locomotive',
+    'steel arch bridge',
+    'steel drum',
+    'stethoscope',
+    'stole',
+    'stone wall',
+    'stopwatch, stop watch',
+    'stove',
+    'strainer',
+    'streetcar, tram, tramcar, trolley, trolley car',
+    'stretcher',
+    'studio couch, day bed',
+    'stupa, tope',
+    'submarine, pigboat, sub, U-boat',
+    'suit, suit of clothes',
+    'sundial',
+    'sunglass',
+    'sunglasses, dark glasses, shades',
+    'sunscreen, sunblock, sun blocker',
+    'suspension bridge',
+    'swab, swob, mop',
+    'sweatshirt',
+    'swimming trunks, bathing trunks',
+    'swing',
+    'switch, electric switch, electrical switch',
+    'syringe',
+    'table lamp',
+    'tank, army tank, armored combat vehicle, armoured combat vehicle',
+    'tape player',
+    'teapot',
+    'teddy, teddy bear',
+    'television, television system',
+    'tennis ball',
+    'thatch, thatched roof',
+    'theater curtain, theatre curtain',
+    'thimble',
+    'thresher, thrasher, threshing machine',
+    'throne',
+    'tile roof',
+    'toaster',
+    'tobacco shop, tobacconist shop, tobacconist',
+    'toilet seat',
+    'torch',
+    'totem pole',
+    'tow truck, tow car, wrecker',
+    'toyshop',
+    'tractor',
+    'trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi',  # noqa: E501
+    'tray',
+    'trench coat',
+    'tricycle, trike, velocipede',
+    'trimaran',
+    'tripod',
+    'triumphal arch',
+    'trolleybus, trolley coach, trackless trolley',
+    'trombone',
+    'tub, vat',
+    'turnstile',
+    'typewriter keyboard',
+    'umbrella',
+    'unicycle, monocycle',
+    'upright, upright piano',
+    'vacuum, vacuum cleaner',
+    'vase',
+    'vault',
+    'velvet',
+    'vending machine',
+    'vestment',
+    'viaduct',
+    'violin, fiddle',
+    'volleyball',
+    'waffle iron',
+    'wall clock',
+    'wallet, billfold, notecase, pocketbook',
+    'wardrobe, closet, press',
+    'warplane, military plane',
+    'washbasin, handbasin, washbowl, lavabo, wash-hand basin',
+    'washer, automatic washer, washing machine',
+    'water bottle',
+    'water jug',
+    'water tower',
+    'whiskey jug',
+    'whistle',
+    'wig',
+    'window screen',
+    'window shade',
+    'Windsor tie',
+    'wine bottle',
+    'wing',
+    'wok',
+    'wooden spoon',
+    'wool, woolen, woollen',
+    'worm fence, snake fence, snake-rail fence, Virginia fence',
+    'wreck',
+    'yawl',
+    'yurt',
+    'web site, website, internet site, site',
+    'comic book',
+    'crossword puzzle, crossword',
+    'street sign',
+    'traffic light, traffic signal, stoplight',
+    'book jacket, dust cover, dust jacket, dust wrapper',
+    'menu',
+    'plate',
+    'guacamole',
+    'consomme',
+    'hot pot, hotpot',
+    'trifle',
+    'ice cream, icecream',
+    'ice lolly, lolly, lollipop, popsicle',
+    'French loaf',
+    'bagel, beigel',
+    'pretzel',
+    'cheeseburger',
+    'hotdog, hot dog, red hot',
+    'mashed potato',
+    'head cabbage',
+    'broccoli',
+    'cauliflower',
+    'zucchini, courgette',
+    'spaghetti squash',
+    'acorn squash',
+    'butternut squash',
+    'cucumber, cuke',
+    'artichoke, globe artichoke',
+    'bell pepper',
+    'cardoon',
+    'mushroom',
+    'Granny Smith',
+    'strawberry',
+    'orange',
+    'lemon',
+    'fig',
+    'pineapple, ananas',
+    'banana',
+    'jackfruit, jak, jack',
+    'custard apple',
+    'pomegranate',
+    'hay',
+    'carbonara',
+    'chocolate sauce, chocolate syrup',
+    'dough',
+    'meat loaf, meatloaf',
+    'pizza, pizza pie',
+    'potpie',
+    'burrito',
+    'red wine',
+    'espresso',
+    'cup',
+    'eggnog',
+    'alp',
+    'bubble',
+    'cliff, drop, drop-off',
+    'coral reef',
+    'geyser',
+    'lakeside, lakeshore',
+    'promontory, headland, head, foreland',
+    'sandbar, sand bar',
+    'seashore, coast, seacoast, sea-coast',
+    'valley, vale',
+    'volcano',
+    'ballplayer, baseball player',
+    'groom, bridegroom',
+    'scuba diver',
+    'rapeseed',
+    'daisy',
+    "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",  # noqa: E501
+    'corn',
+    'acorn',
+    'hip, rose hip, rosehip',
+    'buckeye, horse chestnut, conker',
+    'coral fungus',
+    'agaric',
+    'gyromitra',
+    'stinkhorn, carrion fungus',
+    'earthstar',
+    'hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa',  # noqa: E501
+    'bolete',
+    'ear, spike, capitulum',
+    'toilet tissue, toilet paper, bathroom tissue')
+CIFAR10_CATEGORIES = ('airplane', 'automobile', 'bird', 'cat', 'deer', 'dog',
+                      'frog', 'horse', 'ship', 'truck')
+CIFAR100_CATEGORIES = (
+    'apple', 'aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee', 'beetle',
+    'bicycle', 'bottle', 'bowl', 'boy', 'bridge', 'bus', 'butterfly', 'camel',
+    'can', 'castle', 'caterpillar', 'cattle', 'chair', 'chimpanzee', 'clock',
+    'cloud', 'cockroach', 'couch', 'crab', 'crocodile', 'cup', 'dinosaur',
+    'dolphin', 'elephant', 'flatfish', 'forest', 'fox', 'girl', 'hamster',
+    'house', 'kangaroo', 'keyboard', 'lamp', 'lawn_mower', 'leopard', 'lion',
+    'lizard', 'lobster', 'man', 'maple_tree', 'motorcycle', 'mountain',
+    'mouse', 'mushroom', 'oak_tree', 'orange', 'orchid', 'otter', 'palm_tree',
+    'pear', 'pickup_truck', 'pine_tree', 'plain', 'plate', 'poppy',
+    'porcupine', 'possum', 'rabbit', 'raccoon', 'ray', 'road', 'rocket',
+    'rose', 'sea', 'seal', 'shark', 'shrew', 'skunk', 'skyscraper', 'snail',
+    'snake', 'spider', 'squirrel', 'streetcar', 'sunflower', 'sweet_pepper',
+    'table', 'tank', 'telephone', 'television', 'tiger', 'tractor', 'train',
+    'trout', 'tulip', 'turtle', 'wardrobe', 'whale', 'willow_tree', 'wolf',
+    'woman', 'worm')
+MNIST_CATEGORITES = ('0 - zero', '1 - one', '2 - two', '3 - three', '4 - four',
+                     '5 - five', '6 - six', '7 - seven', '8 - eight',
+                     '9 - nine')
+FASHIONMNIST_CATEGORITES = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress',
+                            'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag',
+                            'Ankle boot')
+PLACES205_CATEGORIES = (
+    'abbey', 'airport_terminal', 'alley', 'amphitheater', 'amusement_park',
+    'aquarium', 'aqueduct', 'arch', 'art_gallery', 'art_studio',
+    'assembly_line', 'attic', 'auditorium', 'apartment_building/outdoor',
+    'badlands', 'ballroom', 'bamboo_forest', 'banquet_hall', 'bar',
+    'baseball_field', 'basement', 'basilica', 'bayou', 'beauty_salon',
+    'bedroom', 'boardwalk', 'boat_deck', 'bookstore', 'botanical_garden',
+    'bowling_alley', 'boxing_ring', 'bridge', 'building_facade',
+    'bus_interior', 'butchers_shop', 'butte', 'bakery/shop', 'cafeteria',
+    'campsite', 'candy_store', 'canyon', 'castle', 'cemetery', 'chalet',
+    'classroom', 'closet', 'clothing_store', 'coast', 'cockpit', 'coffee_shop',
+    'conference_center', 'conference_room', 'construction_site', 'corn_field',
+    'corridor', 'cottage_garden', 'courthouse', 'courtyard', 'creek',
+    'crevasse', 'crosswalk', 'cathedral/outdoor', 'church/outdoor', 'dam',
+    'dining_room', 'dock', 'dorm_room', 'driveway', 'desert/sand',
+    'desert/vegetation', 'dinette/home', 'doorway/outdoor', 'engine_room',
+    'excavation', 'fairway', 'fire_escape', 'fire_station', 'food_court',
+    'forest_path', 'forest_road', 'formal_garden', 'fountain',
+    'field/cultivated', 'field/wild', 'galley', 'game_room', 'garbage_dump',
+    'gas_station', 'gift_shop', 'golf_course', 'harbor', 'herb_garden',
+    'highway', 'home_office', 'hospital', 'hospital_room', 'hot_spring',
+    'hotel_room', 'hotel/outdoor', 'ice_cream_parlor', 'iceberg', 'igloo',
+    'islet', 'ice_skating_rink/outdoor', 'inn/outdoor', 'jail_cell', 'kasbah',
+    'kindergarden_classroom', 'kitchen', 'kitchenette', 'laundromat',
+    'lighthouse', 'living_room', 'lobby', 'locker_room', 'mansion', 'marsh',
+    'martial_arts_gym', 'mausoleum', 'medina', 'motel', 'mountain',
+    'mountain_snowy', 'music_studio', 'market/outdoor', 'monastery/outdoor',
+    'museum/indoor', 'nursery', 'ocean', 'office', 'office_building',
+    'orchard', 'pagoda', 'palace', 'pantry', 'parking_lot', 'parlor',
+    'pasture', 'patio', 'pavilion', 'phone_booth', 'picnic_area', 'playground',
+    'plaza', 'pond', 'pulpit', 'racecourse', 'raft', 'railroad_track',
+    'rainforest', 'reception', 'residential_neighborhood', 'restaurant',
+    'restaurant_kitchen', 'restaurant_patio', 'rice_paddy', 'river',
+    'rock_arch', 'rope_bridge', 'ruin', 'runway', 'sandbar', 'schoolhouse',
+    'sea_cliff', 'shed', 'shoe_shop', 'shopfront', 'shower', 'ski_resort',
+    'ski_slope', 'sky', 'skyscraper', 'slum', 'snowfield', 'staircase',
+    'supermarket', 'swamp', 'stadium/baseball', 'stadium/football',
+    'stage/indoor', 'subway_station/platform', 'swimming_pool/outdoor',
+    'television_studio', 'topiary_garden', 'tower', 'train_railway',
+    'tree_farm', 'trench', 'temple/east_asia', 'temple/south_asia',
+    'track/outdoor', 'train_station/platform', 'underwater/coral_reef',
+    'valley', 'vegetable_garden', 'veranda', 'viaduct', 'volcano',
+    'waiting_room', 'water_tower', 'watering_hole', 'wheat_field', 'wind_farm',
+    'windmill', 'yard')
+OxfordIIITPet_CATEGORIES = (
+    'Abyssinian', 'american_bulldog', 'american_pit_bull_terrier',
+    'basset_hound', 'beagle', 'Bengal', 'Birman', 'Bombay', 'boxer',
+    'British_Shorthair', 'chihuahua', 'Egyptian_Mau', 'english_cocker_spaniel',
+    'english_setter', 'german_shorthaired', 'great_pyrenees', 'havanese',
+    'japanese_chin', 'keeshond', 'leonberger', 'Maine_Coon',
+    'miniature_pinscher', 'newfoundland', 'Persian', 'pomeranian', 'pug',
+    'Ragdoll', 'Russian_Blue', 'saint_bernard', 'samoyed', 'scottish_terrier',
+    'shiba_inu', 'Siamese', 'Sphynx', 'staffordshire_bull_terrier',
+    'wheaten_terrier', 'yorkshire_terrier')
+DTD_CATEGORIES = ('banded', 'blotchy', 'braided', 'bubbly', 'bumpy',
+                  'chequered', 'cobwebbed', 'cracked', 'crosshatched',
+                  'crystalline', 'dotted', 'fibrous', 'flecked', 'freckled',
+                  'frilly', 'gauzy', 'grid', 'grooved', 'honeycombed',
+                  'interlaced', 'knitted', 'lacelike', 'lined', 'marbled',
+                  'matted', 'meshed', 'paisley', 'perforated', 'pitted',
+                  'pleated', 'polka-dotted', 'porous', 'potholed', 'scaly',
+                  'smeared', 'spiralled', 'sprinkled', 'stained', 'stratified',
+                  'striped', 'studded', 'swirly', 'veined', 'waffled', 'woven',
+                  'wrinkled', 'zigzagged')
+FGVCAIRCRAFT_CATEGORIES = (
+    '707-320', '727-200', '737-200', '737-300', '737-400', '737-500',
+    '737-600', '737-700', '737-800', '737-900', '747-100', '747-200',
+    '747-300', '747-400', '757-200', '757-300', '767-200', '767-300',
+    '767-400', '777-200', '777-300', 'A300B4', 'A310', 'A318', 'A319', 'A320',
+    'A321', 'A330-200', 'A330-300', 'A340-200', 'A340-300', 'A340-500',
+    'A340-600', 'A380', 'ATR-42', 'ATR-72', 'An-12', 'BAE 146-200',
+    'BAE 146-300', 'BAE-125', 'Beechcraft 1900', 'Boeing 717', 'C-130', 'C-47',
+    'CRJ-200', 'CRJ-700', 'CRJ-900', 'Cessna 172', 'Cessna 208', 'Cessna 525',
+    'Cessna 560', 'Challenger 600', 'DC-10', 'DC-3', 'DC-6', 'DC-8', 'DC-9-30',
+    'DH-82', 'DHC-1', 'DHC-6', 'DHC-8-100', 'DHC-8-300', 'DR-400',
+    'Dornier 328', 'E-170', 'E-190', 'E-195', 'EMB-120', 'ERJ 135', 'ERJ 145',
+    'Embraer Legacy 600', 'Eurofighter Typhoon', 'F-16A/B', 'F/A-18',
+    'Falcon 2000', 'Falcon 900', 'Fokker 100', 'Fokker 50', 'Fokker 70',
+    'Global Express', 'Gulfstream IV', 'Gulfstream V', 'Hawk T1', 'Il-76',
+    'L-1011', 'MD-11', 'MD-80', 'MD-87', 'MD-90', 'Metroliner', 'Model B200',
+    'PA-28', 'SR-20', 'Saab 2000', 'Saab 340', 'Spitfire', 'Tornado', 'Tu-134',
+    'Tu-154', 'Yak-42')
+STANFORDCARS_CATEGORIES = (
+    'AM General Hummer SUV 2000', 'Acura RL Sedan 2012', 'Acura TL Sedan 2012',
+    'Acura TL Type-S 2008', 'Acura TSX Sedan 2012',
+    'Acura Integra Type R 2001', 'Acura ZDX Hatchback 2012',
+    'Aston Martin V8 Vantage Convertible 2012',
+    'Aston Martin V8 Vantage Coupe 2012',
+    'Aston Martin Virage Convertible 2012', 'Aston Martin Virage Coupe 2012',
+    'Audi RS 4 Convertible 2008', 'Audi A5 Coupe 2012', 'Audi TTS Coupe 2012',
+    'Audi R8 Coupe 2012', 'Audi V8 Sedan 1994', 'Audi 100 Sedan 1994',
+    'Audi 100 Wagon 1994', 'Audi TT Hatchback 2011', 'Audi S6 Sedan 2011',
+    'Audi S5 Convertible 2012', 'Audi S5 Coupe 2012', 'Audi S4 Sedan 2012',
+    'Audi S4 Sedan 2007', 'Audi TT RS Coupe 2012',
+    'BMW ActiveHybrid 5 Sedan 2012', 'BMW 1 Series Convertible 2012',
+    'BMW 1 Series Coupe 2012', 'BMW 3 Series Sedan 2012',
+    'BMW 3 Series Wagon 2012', 'BMW 6 Series Convertible 2007',
+    'BMW X5 SUV 2007', 'BMW X6 SUV 2012', 'BMW M3 Coupe 2012',
+    'BMW M5 Sedan 2010', 'BMW M6 Convertible 2010', 'BMW X3 SUV 2012',
+    'BMW Z4 Convertible 2012',
+    'Bentley Continental Supersports Conv. Convertible 2012',
+    'Bentley Arnage Sedan 2009', 'Bentley Mulsanne Sedan 2011',
+    'Bentley Continental GT Coupe 2012', 'Bentley Continental GT Coupe 2007',
+    'Bentley Continental Flying Spur Sedan 2007',
+    'Bugatti Veyron 16.4 Convertible 2009', 'Bugatti Veyron 16.4 Coupe 2009',
+    'Buick Regal GS 2012', 'Buick Rainier SUV 2007', 'Buick Verano Sedan 2012',
+    'Buick Enclave SUV 2012', 'Cadillac CTS-V Sedan 2012',
+    'Cadillac SRX SUV 2012', 'Cadillac Escalade EXT Crew Cab 2007',
+    'Chevrolet Silverado 1500 Hybrid Crew Cab 2012',
+    'Chevrolet Corvette Convertible 2012', 'Chevrolet Corvette ZR1 2012',
+    'Chevrolet Corvette Ron Fellows Edition Z06 2007',
+    'Chevrolet Traverse SUV 2012', 'Chevrolet Camaro Convertible 2012',
+    'Chevrolet HHR SS 2010', 'Chevrolet Impala Sedan 2007',
+    'Chevrolet Tahoe Hybrid SUV 2012', 'Chevrolet Sonic Sedan 2012',
+    'Chevrolet Express Cargo Van 2007', 'Chevrolet Avalanche Crew Cab 2012',
+    'Chevrolet Cobalt SS 2010', 'Chevrolet Malibu Hybrid Sedan 2010',
+    'Chevrolet TrailBlazer SS 2009',
+    'Chevrolet Silverado 2500HD Regular Cab 2012',
+    'Chevrolet Silverado 1500 Classic Extended Cab 2007',
+    'Chevrolet Express Van 2007', 'Chevrolet Monte Carlo Coupe 2007',
+    'Chevrolet Malibu Sedan 2007',
+    'Chevrolet Silverado 1500 Extended Cab 2012',
+    'Chevrolet Silverado 1500 Regular Cab 2012', 'Chrysler Aspen SUV 2009',
+    'Chrysler Sebring Convertible 2010',
+    'Chrysler Town and Country Minivan 2012', 'Chrysler 300 SRT-8 2010',
+    'Chrysler Crossfire Convertible 2008',
+    'Chrysler PT Cruiser Convertible 2008', 'Daewoo Nubira Wagon 2002',
+    'Dodge Caliber Wagon 2012', 'Dodge Caliber Wagon 2007',
+    'Dodge Caravan Minivan 1997', 'Dodge Ram Pickup 3500 Crew Cab 2010',
+    'Dodge Ram Pickup 3500 Quad Cab 2009', 'Dodge Sprinter Cargo Van 2009',
+    'Dodge Journey SUV 2012', 'Dodge Dakota Crew Cab 2010',
+    'Dodge Dakota Club Cab 2007', 'Dodge Magnum Wagon 2008',
+    'Dodge Challenger SRT8 2011', 'Dodge Durango SUV 2012',
+    'Dodge Durango SUV 2007', 'Dodge Charger Sedan 2012',
+    'Dodge Charger SRT-8 2009', 'Eagle Talon Hatchback 1998',
+    'FIAT 500 Abarth 2012', 'FIAT 500 Convertible 2012',
+    'Ferrari FF Coupe 2012', 'Ferrari California Convertible 2012',
+    'Ferrari 458 Italia Convertible 2012', 'Ferrari 458 Italia Coupe 2012',
+    'Fisker Karma Sedan 2012', 'Ford F-450 Super Duty Crew Cab 2012',
+    'Ford Mustang Convertible 2007', 'Ford Freestar Minivan 2007',
+    'Ford Expedition EL SUV 2009', 'Ford Edge SUV 2012',
+    'Ford Ranger SuperCab 2011', 'Ford GT Coupe 2006',
+    'Ford F-150 Regular Cab 2012', 'Ford F-150 Regular Cab 2007',
+    'Ford Focus Sedan 2007', 'Ford E-Series Wagon Van 2012',
+    'Ford Fiesta Sedan 2012', 'GMC Terrain SUV 2012', 'GMC Savana Van 2012',
+    'GMC Yukon Hybrid SUV 2012', 'GMC Acadia SUV 2012',
+    'GMC Canyon Extended Cab 2012', 'Geo Metro Convertible 1993',
+    'HUMMER H3T Crew Cab 2010', 'HUMMER H2 SUT Crew Cab 2009',
+    'Honda Odyssey Minivan 2012', 'Honda Odyssey Minivan 2007',
+    'Honda Accord Coupe 2012', 'Honda Accord Sedan 2012',
+    'Hyundai Veloster Hatchback 2012', 'Hyundai Santa Fe SUV 2012',
+    'Hyundai Tucson SUV 2012', 'Hyundai Veracruz SUV 2012',
+    'Hyundai Sonata Hybrid Sedan 2012', 'Hyundai Elantra Sedan 2007',
+    'Hyundai Accent Sedan 2012', 'Hyundai Genesis Sedan 2012',
+    'Hyundai Sonata Sedan 2012', 'Hyundai Elantra Touring Hatchback 2012',
+    'Hyundai Azera Sedan 2012', 'Infiniti G Coupe IPL 2012',
+    'Infiniti QX56 SUV 2011', 'Isuzu Ascender SUV 2008', 'Jaguar XK XKR 2012',
+    'Jeep Patriot SUV 2012', 'Jeep Wrangler SUV 2012', 'Jeep Liberty SUV 2012',
+    'Jeep Grand Cherokee SUV 2012', 'Jeep Compass SUV 2012',
+    'Lamborghini Reventon Coupe 2008', 'Lamborghini Aventador Coupe 2012',
+    'Lamborghini Gallardo LP 570-4 Superleggera 2012',
+    'Lamborghini Diablo Coupe 2001', 'Land Rover Range Rover SUV 2012',
+    'Land Rover LR2 SUV 2012', 'Lincoln Town Car Sedan 2011',
+    'MINI Cooper Roadster Convertible 2012',
+    'Maybach Landaulet Convertible 2012', 'Mazda Tribute SUV 2011',
+    'McLaren MP4-12C Coupe 2012', 'Mercedes-Benz 300-Class Convertible 1993',
+    'Mercedes-Benz C-Class Sedan 2012', 'Mercedes-Benz SL-Class Coupe 2009',
+    'Mercedes-Benz E-Class Sedan 2012', 'Mercedes-Benz S-Class Sedan 2012',
+    'Mercedes-Benz Sprinter Van 2012', 'Mitsubishi Lancer Sedan 2012',
+    'Nissan Leaf Hatchback 2012', 'Nissan NV Passenger Van 2012',
+    'Nissan Juke Hatchback 2012', 'Nissan 240SX Coupe 1998',
+    'Plymouth Neon Coupe 1999', 'Porsche Panamera Sedan 2012',
+    'Ram C/V Cargo Van Minivan 2012',
+    'Rolls-Royce Phantom Drophead Coupe Convertible 2012',
+    'Rolls-Royce Ghost Sedan 2012', 'Rolls-Royce Phantom Sedan 2012',
+    'Scion xD Hatchback 2012', 'Spyker C8 Convertible 2009',
+    'Spyker C8 Coupe 2009', 'Suzuki Aerio Sedan 2007',
+    'Suzuki Kizashi Sedan 2012', 'Suzuki SX4 Hatchback 2012',
+    'Suzuki SX4 Sedan 2012', 'Tesla Model S Sedan 2012',
+    'Toyota Sequoia SUV 2012', 'Toyota Camry Sedan 2012',
+    'Toyota Corolla Sedan 2012', 'Toyota 4Runner SUV 2012',
+    'Volkswagen Golf Hatchback 2012', 'Volkswagen Golf Hatchback 1991',
+    'Volkswagen Beetle Hatchback 2012', 'Volvo C30 Hatchback 2012',
+    'Volvo 240 Sedan 1993', 'Volvo XC90 SUV 2007',
+    'smart fortwo Convertible 2012')
+SUN397_CATEGORIES = (
+    'abbey', 'airplane_cabin', 'airport_terminal', 'alley', 'amphitheater',
+    'amusement_arcade', 'amusement_park', 'anechoic_chamber',
+    'apartment_building_outdoor', 'apse_indoor', 'aquarium', 'aqueduct',
+    'arch', 'archive', 'arrival_gate_outdoor', 'art_gallery', 'art_school',
+    'art_studio', 'assembly_line', 'athletic_field_outdoor', 'atrium_public',
+    'attic', 'auditorium', 'auto_factory', 'badlands',
+    'badminton_court_indoor', 'baggage_claim', 'bakery_shop',
+    'balcony_exterior', 'balcony_interior', 'ball_pit', 'ballroom',
+    'bamboo_forest', 'banquet_hall', 'bar', 'barn', 'barndoor',
+    'baseball_field', 'basement', 'basilica', 'basketball_court_outdoor',
+    'bathroom', 'batters_box', 'bayou', 'bazaar_indoor', 'bazaar_outdoor',
+    'beach', 'beauty_salon', 'bedroom', 'berth', 'biology_laboratory',
+    'bistro_indoor', 'boardwalk', 'boat_deck', 'boathouse', 'bookstore',
+    'booth_indoor', 'botanical_garden', 'bow_window_indoor',
+    'bow_window_outdoor', 'bowling_alley', 'boxing_ring', 'brewery_indoor',
+    'bridge', 'building_facade', 'bullring', 'burial_chamber', 'bus_interior',
+    'butchers_shop', 'butte', 'cabin_outdoor', 'cafeteria', 'campsite',
+    'campus', 'canal_natural', 'canal_urban', 'candy_store', 'canyon',
+    'car_interior_backseat', 'car_interior_frontseat', 'carrousel',
+    'casino_indoor', 'castle', 'catacomb', 'cathedral_indoor',
+    'cathedral_outdoor', 'cavern_indoor', 'cemetery', 'chalet',
+    'cheese_factory', 'chemistry_lab', 'chicken_coop_indoor',
+    'chicken_coop_outdoor', 'childs_room', 'church_indoor', 'church_outdoor',
+    'classroom', 'clean_room', 'cliff', 'cloister_indoor', 'closet',
+    'clothing_store', 'coast', 'cockpit', 'coffee_shop', 'computer_room',
+    'conference_center', 'conference_room', 'construction_site',
+    'control_room', 'control_tower_outdoor', 'corn_field', 'corral',
+    'corridor', 'cottage_garden', 'courthouse', 'courtroom', 'courtyard',
+    'covered_bridge_exterior', 'creek', 'crevasse', 'crosswalk',
+    'cubicle_office', 'dam', 'delicatessen', 'dentists_office', 'desert_sand',
+    'desert_vegetation', 'diner_indoor', 'diner_outdoor', 'dinette_home',
+    'dinette_vehicle', 'dining_car', 'dining_room', 'discotheque', 'dock',
+    'doorway_outdoor', 'dorm_room', 'driveway', 'driving_range_outdoor',
+    'drugstore', 'electrical_substation', 'elevator_door', 'elevator_interior',
+    'elevator_shaft', 'engine_room', 'escalator_indoor', 'excavation',
+    'factory_indoor', 'fairway', 'fastfood_restaurant', 'field_cultivated',
+    'field_wild', 'fire_escape', 'fire_station', 'firing_range_indoor',
+    'fishpond', 'florist_shop_indoor', 'food_court', 'forest_broadleaf',
+    'forest_needleleaf', 'forest_path', 'forest_road', 'formal_garden',
+    'fountain', 'galley', 'game_room', 'garage_indoor', 'garbage_dump',
+    'gas_station', 'gazebo_exterior', 'general_store_indoor',
+    'general_store_outdoor', 'gift_shop', 'golf_course', 'greenhouse_indoor',
+    'greenhouse_outdoor', 'gymnasium_indoor', 'hangar_indoor',
+    'hangar_outdoor', 'harbor', 'hayfield', 'heliport', 'herb_garden',
+    'highway', 'hill', 'home_office', 'hospital', 'hospital_room',
+    'hot_spring', 'hot_tub_outdoor', 'hotel_outdoor', 'hotel_room', 'house',
+    'hunting_lodge_outdoor', 'ice_cream_parlor', 'ice_floe', 'ice_shelf',
+    'ice_skating_rink_indoor', 'ice_skating_rink_outdoor', 'iceberg', 'igloo',
+    'industrial_area', 'inn_outdoor', 'islet', 'jacuzzi_indoor', 'jail_indoor',
+    'jail_cell', 'jewelry_shop', 'kasbah', 'kennel_indoor', 'kennel_outdoor',
+    'kindergarden_classroom', 'kitchen', 'kitchenette', 'labyrinth_outdoor',
+    'lake_natural', 'landfill', 'landing_deck', 'laundromat', 'lecture_room',
+    'library_indoor', 'library_outdoor', 'lido_deck_outdoor', 'lift_bridge',
+    'lighthouse', 'limousine_interior', 'living_room', 'lobby', 'lock_chamber',
+    'locker_room', 'mansion', 'manufactured_home', 'market_indoor',
+    'market_outdoor', 'marsh', 'martial_arts_gym', 'mausoleum', 'medina',
+    'moat_water', 'monastery_outdoor', 'mosque_indoor', 'mosque_outdoor',
+    'motel', 'mountain', 'mountain_snowy', 'movie_theater_indoor',
+    'museum_indoor', 'music_store', 'music_studio',
+    'nuclear_power_plant_outdoor', 'nursery', 'oast_house',
+    'observatory_outdoor', 'ocean', 'office', 'office_building',
+    'oil_refinery_outdoor', 'oilrig', 'operating_room', 'orchard',
+    'outhouse_outdoor', 'pagoda', 'palace', 'pantry', 'park',
+    'parking_garage_indoor', 'parking_garage_outdoor', 'parking_lot', 'parlor',
+    'pasture', 'patio', 'pavilion', 'pharmacy', 'phone_booth',
+    'physics_laboratory', 'picnic_area', 'pilothouse_indoor',
+    'planetarium_outdoor', 'playground', 'playroom', 'plaza', 'podium_indoor',
+    'podium_outdoor', 'pond', 'poolroom_establishment', 'poolroom_home',
+    'power_plant_outdoor', 'promenade_deck', 'pub_indoor', 'pulpit',
+    'putting_green', 'racecourse', 'raceway', 'raft', 'railroad_track',
+    'rainforest', 'reception', 'recreation_room', 'residential_neighborhood',
+    'restaurant', 'restaurant_kitchen', 'restaurant_patio', 'rice_paddy',
+    'riding_arena', 'river', 'rock_arch', 'rope_bridge', 'ruin', 'runway',
+    'sandbar', 'sandbox', 'sauna', 'schoolhouse', 'sea_cliff', 'server_room',
+    'shed', 'shoe_shop', 'shopfront', 'shopping_mall_indoor', 'shower',
+    'skatepark', 'ski_lodge', 'ski_resort', 'ski_slope', 'sky', 'skyscraper',
+    'slum', 'snowfield', 'squash_court', 'stable', 'stadium_baseball',
+    'stadium_football', 'stage_indoor', 'staircase', 'street',
+    'subway_interior', 'subway_station_platform', 'supermarket', 'sushi_bar',
+    'swamp', 'swimming_pool_indoor', 'swimming_pool_outdoor',
+    'synagogue_indoor', 'synagogue_outdoor', 'television_studio',
+    'temple_east_asia', 'temple_south_asia', 'tennis_court_indoor',
+    'tennis_court_outdoor', 'tent_outdoor', 'theater_indoor_procenium',
+    'theater_indoor_seats', 'thriftshop', 'throne_room', 'ticket_booth',
+    'toll_plaza', 'topiary_garden', 'tower', 'toyshop', 'track_outdoor',
+    'train_railway', 'train_station_platform', 'tree_farm', 'tree_house',
+    'trench', 'underwater_coral_reef', 'utility_room', 'valley',
+    'van_interior', 'vegetable_garden', 'veranda', 'veterinarians_office',
+    'viaduct', 'videostore', 'village', 'vineyard', 'volcano',
+    'volleyball_court_indoor', 'volleyball_court_outdoor', 'waiting_room',
+    'warehouse_indoor', 'water_tower', 'waterfall_block', 'waterfall_fan',
+    'waterfall_plunge', 'watering_hole', 'wave', 'wet_bar', 'wheat_field',
+    'wind_farm', 'windmill', 'wine_cellar_barrel_storage',
+    'wine_cellar_bottle_storage', 'wrestling_ring_indoor', 'yard',
+    'youth_hostel')
+CALTECH101_CATEGORIES = (
+    'BACKGROUND_Google', 'Faces', 'Faces_easy', 'Leopards', 'Motorbikes',
+    'accordion', 'airplanes', 'anchor', 'ant', 'barrel', 'bass', 'beaver',
+    'binocular', 'bonsai', 'brain', 'brontosaurus', 'buddha', 'butterfly',
+    'camera', 'cannon', 'car_side', 'ceiling_fan', 'cellphone', 'chair',
+    'chandelier', 'cougar_body', 'cougar_face', 'crab', 'crayfish',
+    'crocodile', 'crocodile_head', 'cup', 'dalmatian', 'dollar_bill',
+    'dolphin', 'dragonfly', 'electric_guitar', 'elephant', 'emu', 'euphonium',
+    'ewer', 'ferry', 'flamingo', 'flamingo_head', 'garfield', 'gerenuk',
+    'gramophone', 'grand_piano', 'hawksbill', 'headphone', 'hedgehog',
+    'helicopter', 'ibis', 'inline_skate', 'joshua_tree', 'kangaroo', 'ketch',
+    'lamp', 'laptop', 'llama', 'lobster', 'lotus', 'mandolin', 'mayfly',
+    'menorah', 'metronome', 'minaret', 'nautilus', 'octopus', 'okapi',
+    'pagoda', 'panda', 'pigeon', 'pizza', 'platypus', 'pyramid', 'revolver',
+    'rhino', 'rooster', 'saxophone', 'schooner', 'scissors', 'scorpion',
+    'sea_horse', 'snoopy', 'soccer_ball', 'stapler', 'starfish', 'stegosaurus',
+    'stop_sign', 'strawberry', 'sunflower', 'tick', 'trilobite', 'umbrella',
+    'watch', 'water_lilly', 'wheelchair', 'wild_cat', 'windsor_chair',
+    'wrench', 'yin_yang')
+FOOD101_CATEGORIES = (
+    'apple_pie', 'baby_back_ribs', 'baklava', 'beef_carpaccio', 'beef_tartare',
+    'beet_salad', 'beignets', 'bibimbap', 'bread_pudding', 'breakfast_burrito',
+    'bruschetta', 'caesar_salad', 'cannoli', 'caprese_salad', 'carrot_cake',
+    'ceviche', 'cheesecake', 'cheese_plate', 'chicken_curry',
+    'chicken_quesadilla', 'chicken_wings', 'chocolate_cake',
+    'chocolate_mousse', 'churros', 'clam_chowder', 'club_sandwich',
+    'crab_cakes', 'creme_brulee', 'croque_madame', 'cup_cakes', 'deviled_eggs',
+    'donuts', 'dumplings', 'edamame', 'eggs_benedict', 'escargots', 'falafel',
+    'filet_mignon', 'fish_and_chips', 'foie_gras', 'french_fries',
+    'french_onion_soup', 'french_toast', 'fried_calamari', 'fried_rice',
+    'frozen_yogurt', 'garlic_bread', 'gnocchi', 'greek_salad',
+    'grilled_cheese_sandwich', 'grilled_salmon', 'guacamole', 'gyoza',
+    'hamburger', 'hot_and_sour_soup', 'hot_dog', 'huevos_rancheros', 'hummus',
+    'ice_cream', 'lasagna', 'lobster_bisque', 'lobster_roll_sandwich',
+    'macaroni_and_cheese', 'macarons', 'miso_soup', 'mussels', 'nachos',
+    'omelette', 'onion_rings', 'oysters', 'pad_thai', 'paella', 'pancakes',
+    'panna_cotta', 'peking_duck', 'pho', 'pizza', 'pork_chop', 'poutine',
+    'prime_rib', 'pulled_pork_sandwich', 'ramen', 'ravioli', 'red_velvet_cake',
+    'risotto', 'samosa', 'sashimi', 'scallops', 'seaweed_salad',
+    'shrimp_and_grits', 'spaghetti_bolognese', 'spaghetti_carbonara',
+    'spring_rolls', 'steak', 'strawberry_shortcake', 'sushi', 'tacos',
+    'takoyaki', 'tiramisu', 'tuna_tartare', 'waffles')
+CIFAR100_CATEGORIES_CN = (
+    '苹果', '水族馆鱼', '婴儿', '熊', '河狸', '床', '蜜蜂', '甲虫', '自行车', '瓶子', '碗', '小男孩',
+    '桥', '公共汽车', '蝴蝶', '骆驼', '易拉罐', '城堡', '毛毛虫', '牛', '椅子', '猩猩', '钟', '白云',
+    '蟑螂', '沙发', '螃蟹', '鳄鱼', '杯子', '恐龙', '海豚', '大象', '比目鱼', '森林', '狐狸', '小女孩',
+    '仓鼠', '屋子', '袋鼠', '键盘', '台灯', '割草机', '猎豹', '狮子', '蜥蜴', '龙虾', '男人', '枫树',
+    '摩托车', '山', '老鼠', '蘑菇', '橡树', '橙子橘子', '兰花', '水獭', '棕榈树', '梨', '皮卡车', '松树',
+    '田野', '盘子', '罂粟', '豪猪', '负鼠', '兔子', '浣熊', '鳐鱼', '公路', '火箭', '玫瑰', '大海',
+    '海豹', '鲨鱼', '尖嘴小鼠', '臭鼬', '摩天大楼', '蜗牛', '蛇', '蜘蛛', '松鼠', '电车', '向日葵', '甜椒',
+    '桌子', '坦克', '电话', '电视', '老虎', '拖拉机', '火车', '鳟鱼', '郁金香', '乌龟', '衣柜', '鲸鱼',
+    '柳树', '狼', '女人', '蠕虫')

mmpretrain/datasets/cifar.py ADDED Viewed

	@@ -0,0 +1,210 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import pickle
+from typing import List, Optional
+import mmengine.dist as dist
+import numpy as np
+from mmengine.fileio import (LocalBackend, exists, get, get_file_backend,
+                             join_path)
+from mmengine.logging import MMLogger
+from mmpretrain.registry import DATASETS
+from .base_dataset import BaseDataset
+from .categories import CIFAR10_CATEGORIES, CIFAR100_CATEGORIES
+from .utils import check_md5, download_and_extract_archive
+@DATASETS.register_module()
+class CIFAR10(BaseDataset):
+    """`CIFAR10 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.
+    This implementation is modified from
+    https://github.com/pytorch/vision/blob/master/torchvision/datasets/cifar.py
+    Args:
+        data_root (str): The root directory of the CIFAR Dataset.
+        split (str, optional): The dataset split, supports "train" and "test".
+            Default to "train".
+        metainfo (dict, optional): Meta information for dataset, such as
+            categories information. Defaults to None.
+        download (bool): Whether to download the dataset if not exists.
+            Defaults to True.
+        **kwargs: Other keyword arguments in :class:`BaseDataset`.
+    """  # noqa: E501
+    base_folder = 'cifar-10-batches-py'
+    url = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
+    filename = 'cifar-10-python.tar.gz'
+    tgz_md5 = 'c58f30108f718f92721af3b95e74349a'
+    train_list = [
+        ['data_batch_1', 'c99cafc152244af753f735de768cd75f'],
+        ['data_batch_2', 'd4bba439e000b95fd0a9bffe97cbabec'],
+        ['data_batch_3', '54ebc095f3ab1f0389bbae665268c751'],
+        ['data_batch_4', '634d18415352ddfa80567beed471001a'],
+        ['data_batch_5', '482c414d41f54cd18b22e5b47cb7c3cb'],
+    ]
+    test_list = [
+        ['test_batch', '40351d587109b95175f43aff81a1287e'],
+    ]
+    meta = {
+        'filename': 'batches.meta',
+        'key': 'label_names',
+        'md5': '5ff9c542aee3614f3951f8cda6e48888',
+    }
+    METAINFO = {'classes': CIFAR10_CATEGORIES}
+    def __init__(self,
+                 data_root: str = '',
+                 split: str = 'train',
+                 metainfo: Optional[dict] = None,
+                 download: bool = True,
+                 data_prefix: str = '',
+                 test_mode: bool = False,
+                 **kwargs):
+        splits = ['train', 'test']
+        assert split in splits, \
+            f"The split must be one of {splits}, but get '{split}'"
+        self.split = split
+        # To handle the BC-breaking
+        if split == 'train' and test_mode:
+            logger = MMLogger.get_current_instance()
+            logger.warning('split="train" but test_mode=True. '
+                           'The training set will be used.')
+        if not data_root and not data_prefix:
+            raise RuntimeError('Please set ``data_root`` to'
+                               'specify the dataset path')
+        self.download = download
+        super().__init__(
+            # The CIFAR dataset doesn't need specify annotation file
+            ann_file='',
+            metainfo=metainfo,
+            data_root=data_root,
+            data_prefix=dict(root=data_prefix),
+            test_mode=test_mode,
+            **kwargs)
+    def load_data_list(self):
+        """Load images and ground truth labels."""
+        root = self.data_prefix['root']
+        backend = get_file_backend(root, enable_singleton=True)
+        if dist.is_main_process() and not self._check_integrity():
+            if not isinstance(backend, LocalBackend):
+                raise RuntimeError(f'The dataset on {root} is not integrated, '
+                                   f'please manually handle it.')
+            if self.download:
+                download_and_extract_archive(
+                    self.url, root, filename=self.filename, md5=self.tgz_md5)
+            else:
+                raise RuntimeError(
+                    f'Cannot find {self.__class__.__name__} dataset in '
+                    f"{self.data_prefix['root']}, you can specify "
+                    '`download=True` to download automatically.')
+        dist.barrier()
+        assert self._check_integrity(), \
+            'Download failed or shared storage is unavailable. Please ' \
+            f'download the dataset manually through {self.url}.'
+        if self.split == 'train':
+            downloaded_list = self.train_list
+        else:
+            downloaded_list = self.test_list
+        imgs = []
+        gt_labels = []
+        # load the picked numpy arrays
+        for file_name, _ in downloaded_list:
+            file_path = join_path(root, self.base_folder, file_name)
+            entry = pickle.loads(get(file_path), encoding='latin1')
+            imgs.append(entry['data'])
+            if 'labels' in entry:
+                gt_labels.extend(entry['labels'])
+            else:
+                gt_labels.extend(entry['fine_labels'])
+        imgs = np.vstack(imgs).reshape(-1, 3, 32, 32)
+        imgs = imgs.transpose((0, 2, 3, 1))  # convert to HWC
+        if self.CLASSES is None:
+            # The metainfo in the file has the lowest priority, therefore
+            # we only need to load it if classes is not specified.
+            self._load_meta()
+        data_list = []
+        for img, gt_label in zip(imgs, gt_labels):
+            info = {'img': img, 'gt_label': int(gt_label)}
+            data_list.append(info)
+        return data_list
+    def _load_meta(self):
+        """Load categories information from metafile."""
+        root = self.data_prefix['root']
+        path = join_path(root, self.base_folder, self.meta['filename'])
+        md5 = self.meta.get('md5', None)
+        if not exists(path) or (md5 is not None and not check_md5(path, md5)):
+            raise RuntimeError(
+                'Dataset metadata file not found or corrupted.' +
+                ' You can use `download=True` to download it')
+        data = pickle.loads(get(path), encoding='latin1')
+        self._metainfo.setdefault('classes', data[self.meta['key']])
+    def _check_integrity(self):
+        """Check the integrity of data files."""
+        root = self.data_prefix['root']
+        for fentry in (self.train_list + self.test_list):
+            filename, md5 = fentry[0], fentry[1]
+            fpath = join_path(root, self.base_folder, filename)
+            if not exists(fpath):
+                return False
+            if md5 is not None and not check_md5(fpath, md5):
+                return False
+        return True
+    def extra_repr(self) -> List[str]:
+        """The extra repr information of the dataset."""
+        body = [f"Prefix of data: \t{self.data_prefix['root']}"]
+        return body
+@DATASETS.register_module()
+class CIFAR100(CIFAR10):
+    """`CIFAR100 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.
+    Args:
+        data_root (str): The root directory of the CIFAR Dataset.
+        split (str, optional): The dataset split, supports "train" and "test".
+            Default to "train".
+        metainfo (dict, optional): Meta information for dataset, such as
+            categories information. Defaults to None.
+        download (bool): Whether to download the dataset if not exists.
+            Defaults to True.
+        **kwargs: Other keyword arguments in :class:`BaseDataset`.
+    """
+    base_folder = 'cifar-100-python'
+    url = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
+    filename = 'cifar-100-python.tar.gz'
+    tgz_md5 = 'eb9058c3a382ffc7106e4002c42a8d85'
+    train_list = [
+        ['train', '16019d7e3df5f24257cddd939b257f8d'],
+    ]
+    test_list = [
+        ['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'],
+    ]
+    meta = {
+        'filename': 'meta',
+        'key': 'fine_label_names',
+        'md5': '7973b15100ade9c7d40fb424638fde48',
+    }
+    METAINFO = {'classes': CIFAR100_CATEGORIES}

mmpretrain/datasets/coco_caption.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from pathlib import Path
+from typing import List
+import mmengine
+from mmengine.dataset import BaseDataset
+from mmengine.fileio import get_file_backend
+from mmpretrain.registry import DATASETS
+@DATASETS.register_module()
+class COCOCaption(BaseDataset):
+    """COCO Caption dataset.
+    Args:
+        data_root (str): The root directory for ``data_prefix`` and
+            ``ann_file``..
+        ann_file (str): Annotation file path.
+        data_prefix (dict): Prefix for data field. Defaults to
+            ``dict(img_path='')``.
+        pipeline (Sequence): Processing pipeline. Defaults to an empty tuple.
+        **kwargs: Other keyword arguments in :class:`BaseDataset`.
+    """
+    def load_data_list(self) -> List[dict]:
+        """Load data list."""
+        img_prefix = self.data_prefix['img_path']
+        annotations = mmengine.load(self.ann_file)
+        file_backend = get_file_backend(img_prefix)
+        data_list = []
+        for ann in annotations:
+            data_info = {
+                'image_id': Path(ann['image']).stem.split('_')[-1],
+                'img_path': file_backend.join_path(img_prefix, ann['image']),
+                'gt_caption': ann['caption'],
+            }
+            data_list.append(data_info)
+        return data_list

mmpretrain/datasets/coco_retrieval.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import json
+from collections import OrderedDict
+from typing import List
+from mmengine import get_file_backend
+from mmpretrain.registry import DATASETS
+from .base_dataset import BaseDataset
+@DATASETS.register_module()
+class COCORetrieval(BaseDataset):
+    """COCO Retrieval dataset.
+    Args:
+        ann_file (str): Annotation file path.
+        test_mode (bool): Whether dataset is used for evaluation. This will
+            decide the annotation format in data list annotations.
+            Defaults to False.
+        data_root (str): The root directory for ``data_prefix`` and
+            ``ann_file``. Defaults to ''.
+        data_prefix (str | dict): Prefix for training data. Defaults to ''.
+        pipeline (Sequence): Processing pipeline. Defaults to an empty tuple.
+        **kwargs: Other keyword arguments in :class:`BaseDataset`.
+    """
+    def load_data_list(self) -> List[dict]:
+        """Load data list."""
+        # get file backend
+        img_prefix = self.data_prefix['img_path']
+        file_backend = get_file_backend(img_prefix)
+        anno_info = json.load(open(self.ann_file, 'r'))
+        # mapping img_id to img filename
+        img_dict = OrderedDict()
+        for idx, img in enumerate(anno_info['images']):
+            if img['id'] not in img_dict:
+                img_rel_path = img['coco_url'].rsplit('/', 2)[-2:]
+                img_path = file_backend.join_path(img_prefix, *img_rel_path)
+                # create new idx for image
+                img_dict[img['id']] = dict(
+                    ori_id=img['id'],
+                    image_id=idx,  # will be used for evaluation
+                    img_path=img_path,
+                    text=[],
+                    gt_text_id=[],
+                    gt_image_id=[],
+                )
+        train_list = []
+        for idx, anno in enumerate(anno_info['annotations']):
+            anno['text'] = anno.pop('caption')
+            anno['ori_id'] = anno.pop('id')
+            anno['text_id'] = idx  # will be used for evaluation
+            # 1. prepare train data list item
+            train_data = anno.copy()
+            train_image = img_dict[train_data['image_id']]
+            train_data['img_path'] = train_image['img_path']
+            train_data['image_ori_id'] = train_image['ori_id']
+            train_data['image_id'] = train_image['image_id']
+            train_data['is_matched'] = True
+            train_list.append(train_data)
+            # 2. prepare eval data list item based on img dict
+            img_dict[anno['image_id']]['gt_text_id'].append(anno['text_id'])
+            img_dict[anno['image_id']]['text'].append(anno['text'])
+            img_dict[anno['image_id']]['gt_image_id'].append(
+                train_image['image_id'])
+        self.img_size = len(img_dict)
+        self.text_size = len(anno_info['annotations'])
+        # return needed format data list
+        if self.test_mode:
+            return list(img_dict.values())
+        return train_list

mmpretrain/datasets/coco_vqa.py ADDED Viewed

	@@ -0,0 +1,114 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import re
+from collections import Counter
+from typing import List
+import mmengine
+from mmengine.dataset import BaseDataset
+from mmpretrain.registry import DATASETS
+@DATASETS.register_module()
+class COCOVQA(BaseDataset):
+    """VQAv2 dataset.
+    Args:
+        data_root (str): The root directory for ``data_prefix``, ``ann_file``
+            and ``question_file``.
+        data_prefix (str): The directory of images.
+        question_file (str): Question file path.
+        ann_file (str, optional): Annotation file path for training and
+            validation. Defaults to an empty string.
+        **kwargs: Other keyword arguments in :class:`BaseDataset`.
+    """
+    def __init__(self,
+                 data_root: str,
+                 data_prefix: str,
+                 question_file: str,
+                 ann_file: str = '',
+                 **kwarg):
+        self.question_file = question_file
+        super().__init__(
+            data_root=data_root,
+            data_prefix=dict(img_path=data_prefix),
+            ann_file=ann_file,
+            **kwarg,
+        )
+    def _join_prefix(self):
+        if not mmengine.is_abs(self.question_file) and self.question_file:
+            self.question_file = osp.join(self.data_root, self.question_file)
+        return super()._join_prefix()
+    def _create_image_index(self):
+        img_prefix = self.data_prefix['img_path']
+        files = mmengine.list_dir_or_file(img_prefix, list_dir=False)
+        image_index = {}
+        for file in files:
+            image_id = re.findall(r'\d{12}', file)
+            if len(image_id) > 0:
+                image_id = int(image_id[-1])
+                image_index[image_id] = mmengine.join_path(img_prefix, file)
+        return image_index
+    def load_data_list(self) -> List[dict]:
+        """Load data list."""
+        questions = mmengine.load(self.question_file)['questions']
+        if self.ann_file:
+            annotations = mmengine.load(self.ann_file)['annotations']
+            assert len(questions) == len(annotations)
+        else:
+            annotations = [None] * len(questions)
+        # The original VQAv2 annotation file and question file includes
+        # only image id but no image file paths.
+        self.image_index = self._create_image_index()
+        data_list = []
+        for question, ann in zip(questions, annotations):
+            # question example
+            # {
+            #     'image_id': 262144,
+            #     'question': "Is the ball flying towards the batter?",
+            #     'question_id': 262144000
+            # }
+            #
+            # ann example
+            # {
+            #     'question_type': "what are the",
+            #     'answer_type': "other",
+            #     'answers': [
+            #         {'answer': 'watching',
+            #          'answer_id': 1,
+            #          'answer_confidence': 'yes'},
+            #         ...
+            #     ],
+            #     'image_id': 262148,
+            #     'question_id': 262148000,
+            #     'multiple_choice_answer': 'watching',
+            #     'answer_type': 'other',
+            # }
+            data_info = question
+            data_info['img_path'] = self.image_index[question['image_id']]
+            if ann is not None:
+                assert ann['question_id'] == question['question_id']
+                # add answer_weight & answer_count, delete duplicate answer
+                answers = [item['answer'] for item in ann.pop('answers')]
+                count = Counter(answers)
+                answer_weight = [i / len(answers) for i in count.values()]
+                data_info['gt_answer'] = list(count.keys())
+                data_info['gt_answer_weight'] = answer_weight
+                data_info.update(ann)
+            data_list.append(data_info)
+        return data_list

mmpretrain/datasets/cub.py ADDED Viewed

	@@ -0,0 +1,142 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import List
+from mmengine import get_file_backend, list_from_file
+from mmengine.logging import MMLogger
+from mmpretrain.registry import DATASETS
+from .base_dataset import BaseDataset
+from .categories import CUB_CATEGORIES
+@DATASETS.register_module()
+class CUB(BaseDataset):
+    """The CUB-200-2011 Dataset.
+    Support the `CUB-200-2011 <http://www.vision.caltech.edu/visipedia/CUB-200-2011.html>`_ Dataset.
+    Comparing with the `CUB-200 <http://www.vision.caltech.edu/visipedia/CUB-200.html>`_ Dataset,
+    there are much more pictures in `CUB-200-2011`. After downloading and decompression, the dataset
+    directory structure is as follows.
+    CUB dataset directory: ::
+        CUB_200_2011
+        ├── images
+        │   ├── class_x
+        │   │   ├── xx1.jpg
+        │   │   ├── xx2.jpg
+        │   │   └── ...
+        │   ├── class_y
+        │   │   ├── yy1.jpg
+        │   │   ├── yy2.jpg
+        │   │   └── ...
+        │   └── ...
+        ├── images.txt
+        ├── image_class_labels.txt
+        ├── train_test_split.txt
+        └── ....
+    Args:
+        data_root (str): The root directory for CUB-200-2011 dataset.
+        split (str, optional): The dataset split, supports "train" and "test".
+            Default to "train".
+    Examples:
+        >>> from mmpretrain.datasets import CUB
+        >>> train_dataset = CUB(data_root='data/CUB_200_2011', split='train')
+        >>> train_dataset
+        Dataset CUB
+            Number of samples:  5994
+            Number of categories:       200
+            Root of dataset:    data/CUB_200_2011
+        >>> test_dataset = CUB(data_root='data/CUB_200_2011', split='test')
+        >>> test_dataset
+        Dataset CUB
+            Number of samples:  5794
+            Number of categories:       200
+            Root of dataset:    data/CUB_200_2011
+    """  # noqa: E501
+    METAINFO = {'classes': CUB_CATEGORIES}
+    def __init__(self,
+                 data_root: str,
+                 split: str = 'train',
+                 test_mode: bool = False,
+                 **kwargs):
+        splits = ['train', 'test']
+        assert split in splits, \
+            f"The split must be one of {splits}, but get '{split}'"
+        self.split = split
+        # To handle the BC-breaking
+        if split == 'train' and test_mode:
+            logger = MMLogger.get_current_instance()
+            logger.warning('split="train" but test_mode=True. '
+                           'The training set will be used.')
+        ann_file = 'images.txt'
+        data_prefix = 'images'
+        image_class_labels_file = 'image_class_labels.txt'
+        train_test_split_file = 'train_test_split.txt'
+        self.backend = get_file_backend(data_root, enable_singleton=True)
+        self.image_class_labels_file = self.backend.join_path(
+            data_root, image_class_labels_file)
+        self.train_test_split_file = self.backend.join_path(
+            data_root, train_test_split_file)
+        super(CUB, self).__init__(
+            ann_file=ann_file,
+            data_root=data_root,
+            data_prefix=data_prefix,
+            test_mode=test_mode,
+            **kwargs)
+    def _load_data_from_txt(self, filepath):
+        """load data from CUB txt file, the every line of the file is idx and a
+        data item."""
+        pairs = list_from_file(filepath)
+        data_dict = dict()
+        for pair in pairs:
+            idx, data_item = pair.split()
+            # all the index starts from 1 in CUB files,
+            # here we need to '- 1' to let them start from 0.
+            data_dict[int(idx) - 1] = data_item
+        return data_dict
+    def load_data_list(self):
+        """Load images and ground truth labels."""
+        sample_dict = self._load_data_from_txt(self.ann_file)
+        label_dict = self._load_data_from_txt(self.image_class_labels_file)
+        split_dict = self._load_data_from_txt(self.train_test_split_file)
+        assert sample_dict.keys() == label_dict.keys() == split_dict.keys(),\
+            f'sample_ids should be same in files {self.ann_file}, ' \
+            f'{self.image_class_labels_file} and {self.train_test_split_file}'
+        data_list = []
+        for sample_id in sample_dict.keys():
+            if split_dict[sample_id] == '1' and self.split == 'test':
+                # skip train samples when split='test'
+                continue
+            elif split_dict[sample_id] == '0' and self.split == 'train':
+                # skip test samples when split='train'
+                continue
+            img_path = self.backend.join_path(self.img_prefix,
+                                              sample_dict[sample_id])
+            gt_label = int(label_dict[sample_id]) - 1
+            info = dict(img_path=img_path, gt_label=gt_label)
+            data_list.append(info)
+        return data_list
+    def extra_repr(self) -> List[str]:
+        """The extra repr information of the dataset."""
+        body = [
+            f'Root of dataset: \t{self.data_root}',
+        ]
+        return body

mmpretrain/datasets/custom.py ADDED Viewed

	@@ -0,0 +1,287 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Callable, Dict, List, Optional, Sequence, Tuple, Union
+from mmengine.fileio import (BaseStorageBackend, get_file_backend,
+                             list_from_file)
+from mmengine.logging import MMLogger
+from mmpretrain.registry import DATASETS
+from .base_dataset import BaseDataset
+def find_folders(
+    root: str,
+    backend: Optional[BaseStorageBackend] = None
+) -> Tuple[List[str], Dict[str, int]]:
+    """Find classes by folders under a root.
+    Args:
+        root (string): root directory of folders
+        backend (BaseStorageBackend | None): The file backend of the root.
+            If None, auto infer backend from the root path. Defaults to None.
+    Returns:
+        Tuple[List[str], Dict[str, int]]:
+        - folders: The name of sub folders under the root.
+        - folder_to_idx: The map from folder name to class idx.
+    """
+    # Pre-build file backend to prevent verbose file backend inference.
+    backend = backend or get_file_backend(root, enable_singleton=True)
+    folders = list(
+        backend.list_dir_or_file(
+            root,
+            list_dir=True,
+            list_file=False,
+            recursive=False,
+        ))
+    folders.sort()
+    folder_to_idx = {folders[i]: i for i in range(len(folders))}
+    return folders, folder_to_idx
+def get_samples(
+    root: str,
+    folder_to_idx: Dict[str, int],
+    is_valid_file: Callable,
+    backend: Optional[BaseStorageBackend] = None,
+):
+    """Make dataset by walking all images under a root.
+    Args:
+        root (string): root directory of folders
+        folder_to_idx (dict): the map from class name to class idx
+        is_valid_file (Callable): A function that takes path of a file
+            and check if the file is a valid sample file.
+        backend (BaseStorageBackend | None): The file backend of the root.
+            If None, auto infer backend from the root path. Defaults to None.
+    Returns:
+        Tuple[list, set]:
+        - samples: a list of tuple where each element is (image, class_idx)
+        - empty_folders: The folders don't have any valid files.
+    """
+    samples = []
+    available_classes = set()
+    # Pre-build file backend to prevent verbose file backend inference.
+    backend = backend or get_file_backend(root, enable_singleton=True)
+    if folder_to_idx is not None:
+        for folder_name in sorted(list(folder_to_idx.keys())):
+            _dir = backend.join_path(root, folder_name)
+            files = backend.list_dir_or_file(
+                _dir,
+                list_dir=False,
+                list_file=True,
+                recursive=True,
+            )
+            for file in sorted(list(files)):
+                if is_valid_file(file):
+                    path = backend.join_path(folder_name, file)
+                    item = (path, folder_to_idx[folder_name])
+                    samples.append(item)
+                    available_classes.add(folder_name)
+        empty_folders = set(folder_to_idx.keys()) - available_classes
+    else:
+        files = backend.list_dir_or_file(
+            root,
+            list_dir=False,
+            list_file=True,
+            recursive=True,
+        )
+        samples = [file for file in sorted(list(files)) if is_valid_file(file)]
+        empty_folders = None
+    return samples, empty_folders
+@DATASETS.register_module()
+class CustomDataset(BaseDataset):
+    """A generic dataset for multiple tasks.
+    The dataset supports two kinds of style.
+    1. Use an annotation file to specify all samples, and each line indicates a
+       sample:
+       The annotation file (for ``with_label=True``, supervised tasks.): ::
+           folder_1/xxx.png 0
+           folder_1/xxy.png 1
+           123.png 4
+           nsdf3.png 3
+           ...
+       The annotation file (for ``with_label=False``, unsupervised tasks.): ::
+           folder_1/xxx.png
+           folder_1/xxy.png
+           123.png
+           nsdf3.png
+           ...
+       Sample files: ::
+           data_prefix/
+           ├── folder_1
+           │   ├── xxx.png
+           │   ├── xxy.png
+           │   └── ...
+           ├── 123.png
+           ├── nsdf3.png
+           └── ...
+       Please use the argument ``metainfo`` to specify extra information for
+       the task, like ``{'classes': ('bird', 'cat', 'deer', 'dog', 'frog')}``.
+    2. Place all samples in one folder as below:
+       Sample files (for ``with_label=True``, supervised tasks, we use the name
+       of sub-folders as the categories names): ::
+           data_prefix/
+           ├── class_x
+           │   ├── xxx.png
+           │   ├── xxy.png
+           │   └── ...
+           │       └── xxz.png
+           └── class_y
+               ├── 123.png
+               ├── nsdf3.png
+               ├── ...
+               └��─ asd932_.png
+       Sample files (for ``with_label=False``, unsupervised tasks, we use all
+       sample files under the specified folder): ::
+           data_prefix/
+           ├── folder_1
+           │   ├── xxx.png
+           │   ├── xxy.png
+           │   └── ...
+           ├── 123.png
+           ├── nsdf3.png
+           └── ...
+    If the ``ann_file`` is specified, the dataset will be generated by the
+    first way, otherwise, try the second way.
+    Args:
+        data_root (str): The root directory for ``data_prefix`` and
+            ``ann_file``. Defaults to ''.
+        data_prefix (str | dict): Prefix for the data. Defaults to ''.
+        ann_file (str): Annotation file path. Defaults to ''.
+        with_label (bool): Whether the annotation file includes ground truth
+            labels, or use sub-folders to specify categories.
+            Defaults to True.
+        extensions (Sequence[str]): A sequence of allowed extensions. Defaults
+            to ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif').
+        metainfo (dict, optional): Meta information for dataset, such as class
+            information. Defaults to None.
+        lazy_init (bool): Whether to load annotation during instantiation.
+            In some cases, such as visualization, only the meta information of
+            the dataset is needed, which is not necessary to load annotation
+            file. ``Basedataset`` can skip load annotations to save time by set
+            ``lazy_init=False``. Defaults to False.
+        **kwargs: Other keyword arguments in :class:`BaseDataset`.
+    """
+    def __init__(self,
+                 data_root: str = '',
+                 data_prefix: Union[str, dict] = '',
+                 ann_file: str = '',
+                 with_label=True,
+                 extensions: Sequence[str] = ('.jpg', '.jpeg', '.png', '.ppm',
+                                              '.bmp', '.pgm', '.tif'),
+                 metainfo: Optional[dict] = None,
+                 lazy_init: bool = False,
+                 **kwargs):
+        assert (ann_file or data_prefix or data_root), \
+            'One of `ann_file`, `data_root` and `data_prefix` must '\
+            'be specified.'
+        self.extensions = tuple(set([i.lower() for i in extensions]))
+        self.with_label = with_label
+        super().__init__(
+            # The base class requires string ann_file but this class doesn't
+            ann_file=ann_file,
+            metainfo=metainfo,
+            data_root=data_root,
+            data_prefix=data_prefix,
+            # Force to lazy_init for some modification before loading data.
+            lazy_init=True,
+            **kwargs)
+        # Full initialize the dataset.
+        if not lazy_init:
+            self.full_init()
+    def _find_samples(self):
+        """find samples from ``data_prefix``."""
+        if self.with_label:
+            classes, folder_to_idx = find_folders(self.img_prefix)
+            samples, empty_classes = get_samples(
+                self.img_prefix,
+                folder_to_idx,
+                is_valid_file=self.is_valid_file,
+            )
+            self.folder_to_idx = folder_to_idx
+            if self.CLASSES is not None:
+                assert len(self.CLASSES) == len(classes), \
+                    f"The number of subfolders ({len(classes)}) doesn't " \
+                    f'match the number of specified classes ' \
+                    f'({len(self.CLASSES)}). Please check the data folder.'
+            else:
+                self._metainfo['classes'] = tuple(classes)
+        else:
+            samples, empty_classes = get_samples(
+                self.img_prefix,
+                None,
+                is_valid_file=self.is_valid_file,
+            )
+        if len(samples) == 0:
+            raise RuntimeError(
+                f'Found 0 files in subfolders of: {self.data_prefix}. '
+                f'Supported extensions are: {",".join(self.extensions)}')
+        if empty_classes:
+            logger = MMLogger.get_current_instance()
+            logger.warning(
+                'Found no valid file in the folder '
+                f'{", ".join(empty_classes)}. '
+                f"Supported extensions are: {', '.join(self.extensions)}")
+        return samples
+    def load_data_list(self):
+        """Load image paths and gt_labels."""
+        if not self.ann_file:
+            samples = self._find_samples()
+        elif self.with_label:
+            lines = list_from_file(self.ann_file)
+            samples = [x.strip().rsplit(' ', 1) for x in lines]
+        else:
+            samples = list_from_file(self.ann_file)
+        # Pre-build file backend to prevent verbose file backend inference.
+        backend = get_file_backend(self.img_prefix, enable_singleton=True)
+        data_list = []
+        for sample in samples:
+            if self.with_label:
+                filename, gt_label = sample
+                img_path = backend.join_path(self.img_prefix, filename)
+                info = {'img_path': img_path, 'gt_label': int(gt_label)}
+            else:
+                img_path = backend.join_path(self.img_prefix, sample)
+                info = {'img_path': img_path}
+            data_list.append(info)
+        return data_list
+    def is_valid_file(self, filename: str) -> bool:
+        """Check if a file is a valid sample."""
+        return filename.lower().endswith(self.extensions)

mmpretrain/datasets/dataset_wrappers.py ADDED Viewed

	@@ -0,0 +1,176 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import numpy as np
+from mmengine.dataset import BaseDataset, force_full_init
+from mmpretrain.registry import DATASETS
+@DATASETS.register_module()
+class KFoldDataset:
+    """A wrapper of dataset for K-Fold cross-validation.
+    K-Fold cross-validation divides all the samples in groups of samples,
+    called folds, of almost equal sizes. And we use k-1 of folds to do training
+    and use the fold left to do validation.
+    Args:
+        dataset (:obj:`mmengine.dataset.BaseDataset` | dict): The dataset to be
+            divided
+        fold (int): The fold used to do validation. Defaults to 0.
+        num_splits (int): The number of all folds. Defaults to 5.
+        test_mode (bool): Use the training dataset or validation dataset.
+            Defaults to False.
+        seed (int, optional): The seed to shuffle the dataset before splitting.
+            If None, not shuffle the dataset. Defaults to None.
+    """
+    def __init__(self,
+                 dataset,
+                 fold=0,
+                 num_splits=5,
+                 test_mode=False,
+                 seed=None):
+        if isinstance(dataset, dict):
+            self.dataset = DATASETS.build(dataset)
+            # Init the dataset wrapper lazily according to the dataset setting.
+            lazy_init = dataset.get('lazy_init', False)
+        elif isinstance(dataset, BaseDataset):
+            self.dataset = dataset
+        else:
+            raise TypeError(f'Unsupported dataset type {type(dataset)}.')
+        self._metainfo = getattr(self.dataset, 'metainfo', {})
+        self.fold = fold
+        self.num_splits = num_splits
+        self.test_mode = test_mode
+        self.seed = seed
+        self._fully_initialized = False
+        if not lazy_init:
+            self.full_init()
+    @property
+    def metainfo(self) -> dict:
+        """Get the meta information of ``self.dataset``.
+        Returns:
+            dict: Meta information of the dataset.
+        """
+        # Prevent `self._metainfo` from being modified by outside.
+        return copy.deepcopy(self._metainfo)
+    def full_init(self):
+        """fully initialize the dataset."""
+        if self._fully_initialized:
+            return
+        self.dataset.full_init()
+        ori_len = len(self.dataset)
+        indices = list(range(ori_len))
+        if self.seed is not None:
+            rng = np.random.default_rng(self.seed)
+            rng.shuffle(indices)
+        test_start = ori_len * self.fold // self.num_splits
+        test_end = ori_len * (self.fold + 1) // self.num_splits
+        if self.test_mode:
+            indices = indices[test_start:test_end]
+        else:
+            indices = indices[:test_start] + indices[test_end:]
+        self._ori_indices = indices
+        self.dataset = self.dataset.get_subset(indices)
+        self._fully_initialized = True
+    @force_full_init
+    def _get_ori_dataset_idx(self, idx: int) -> int:
+        """Convert global idx to local index.
+        Args:
+            idx (int): Global index of ``KFoldDataset``.
+        Returns:
+            int: The original index in the whole dataset.
+        """
+        return self._ori_indices[idx]
+    @force_full_init
+    def get_data_info(self, idx: int) -> dict:
+        """Get annotation by index.
+        Args:
+            idx (int): Global index of ``KFoldDataset``.
+        Returns:
+            dict: The idx-th annotation of the datasets.
+        """
+        return self.dataset.get_data_info(idx)
+    @force_full_init
+    def __len__(self):
+        return len(self.dataset)
+    @force_full_init
+    def __getitem__(self, idx):
+        return self.dataset[idx]
+    @force_full_init
+    def get_cat_ids(self, idx):
+        return self.dataset.get_cat_ids(idx)
+    @force_full_init
+    def get_gt_labels(self):
+        return self.dataset.get_gt_labels()
+    @property
+    def CLASSES(self):
+        """Return all categories names."""
+        return self._metainfo.get('classes', None)
+    @property
+    def class_to_idx(self):
+        """Map mapping class name to class index.
+        Returns:
+            dict: mapping from class name to class index.
+        """
+        return {cat: i for i, cat in enumerate(self.CLASSES)}
+    def __repr__(self):
+        """Print the basic information of the dataset.
+        Returns:
+            str: Formatted string.
+        """
+        head = 'Dataset ' + self.__class__.__name__
+        body = []
+        type_ = 'test' if self.test_mode else 'training'
+        body.append(f'Type: \t{type_}')
+        body.append(f'Seed: \t{self.seed}')
+        def ordinal(n):
+            # Copy from https://codegolf.stackexchange.com/a/74047
+            suffix = 'tsnrhtdd'[(n // 10 % 10 != 1) * (n % 10 < 4) * n % 10::4]
+            return f'{n}{suffix}'
+        body.append(
+            f'Fold: \t{ordinal(self.fold+1)} of {self.num_splits}-fold')
+        if self._fully_initialized:
+            body.append(f'Number of samples: \t{self.__len__()}')
+        else:
+            body.append("Haven't been initialized")
+        if self.CLASSES is not None:
+            body.append(f'Number of categories: \t{len(self.CLASSES)}')
+        else:
+            body.append('The `CLASSES` meta info is not set.')
+        body.append(
+            f'Original dataset type:\t{self.dataset.__class__.__name__}')
+        lines = [head] + [' ' * 4 + line for line in body]
+        return '\n'.join(lines)

mmpretrain/datasets/dtd.py ADDED Viewed

	@@ -0,0 +1,116 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import List
+import mat4py
+from mmengine import get_file_backend
+from mmpretrain.registry import DATASETS
+from .base_dataset import BaseDataset
+from .categories import DTD_CATEGORIES
+@DATASETS.register_module()
+class DTD(BaseDataset):
+    """The Describable Texture Dataset (DTD).
+    Support the `Describable Texture Dataset <https://www.robots.ox.ac.uk/~vgg/data/dtd/>`_ Dataset.
+    After downloading and decompression, the dataset directory structure is as follows.
+    DTD dataset directory: ::
+        dtd
+        ├── images
+        │   ├── banded
+        |   |   ├──banded_0002.jpg
+        |   |   ├──banded_0004.jpg
+        |   |   └── ...
+        │   └── ...
+        ├── imdb
+        │   └── imdb.mat
+        ├── labels
+        |   |   ├──labels_joint_anno.txt
+        |   |   ├──test1.txt
+        |   |   ├──test2.txt
+        |   |   └── ...
+        │   └── ...
+        └── ....
+    Args:
+        data_root (str): The root directory for Describable Texture dataset.
+        split (str, optional): The dataset split, supports "train",
+            "val", "trainval", and "test". Default to "trainval".
+    Examples:
+        >>> from mmpretrain.datasets import DTD
+        >>> train_dataset = DTD(data_root='data/dtd', split='trainval')
+        >>> train_dataset
+        Dataset DTD
+            Number of samples:  3760
+            Number of categories:       47
+            Root of dataset:    data/dtd
+        >>> test_dataset = DTD(data_root='data/dtd', split='test')
+        >>> test_dataset
+        Dataset DTD
+            Number of samples:  1880
+            Number of categories:       47
+            Root of dataset:    data/dtd
+    """  # noqa: E501
+    METAINFO = {'classes': DTD_CATEGORIES}
+    def __init__(self, data_root: str, split: str = 'trainval', **kwargs):
+        splits = ['train', 'val', 'trainval', 'test']
+        assert split in splits, \
+            f"The split must be one of {splits}, but get '{split}'"
+        self.split = split
+        data_prefix = 'images'
+        test_mode = split == 'test'
+        self.backend = get_file_backend(data_root, enable_singleton=True)
+        ann_file = self.backend.join_path('imdb', 'imdb.mat')
+        super(DTD, self).__init__(
+            ann_file=ann_file,
+            data_root=data_root,
+            data_prefix=data_prefix,
+            test_mode=test_mode,
+            **kwargs)
+    def load_data_list(self):
+        """Load images and ground truth labels."""
+        data = mat4py.loadmat(self.ann_file)['images']
+        names = data['name']
+        labels = data['class']
+        parts = data['set']
+        num = len(names)
+        assert num == len(labels) == len(parts), 'get error ann file'
+        if self.split == 'train':
+            target_set = {1}
+        elif self.split == 'val':
+            target_set = {2}
+        elif self.split == 'test':
+            target_set = {3}
+        else:
+            target_set = {1, 2}
+        data_list = []
+        for i in range(num):
+            if parts[i] in target_set:
+                img_name = names[i]
+                img_path = self.backend.join_path(self.img_prefix, img_name)
+                gt_label = labels[i] - 1
+                info = dict(img_path=img_path, gt_label=gt_label)
+                data_list.append(info)
+        return data_list
+    def extra_repr(self) -> List[str]:
+        """The extra repr information of the dataset."""
+        body = [
+            f'Root of dataset: \t{self.data_root}',
+        ]
+        return body

mmpretrain/datasets/fgvcaircraft.py ADDED Viewed

	@@ -0,0 +1,98 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import List
+from mmengine import get_file_backend, list_from_file
+from mmpretrain.registry import DATASETS
+from .base_dataset import BaseDataset
+from .categories import FGVCAIRCRAFT_CATEGORIES
+@DATASETS.register_module()
+class FGVCAircraft(BaseDataset):
+    """The FGVC_Aircraft Dataset.
+    Support the `FGVC_Aircraft Dataset <https://www.robots.ox.ac.uk/~vgg/data/fgvc-aircraft/>`_ Dataset.
+    After downloading and decompression, the dataset directory structure is as follows.
+    FGVC_Aircraft dataset directory: ::
+        fgvc-aircraft-2013b
+        └── data
+            ├── images
+            │   ├── 1.jpg
+            │   ├── 2.jpg
+            │   └── ...
+            ├── images_variant_train.txt
+            ├── images_variant_test.txt
+            ├── images_variant_trainval.txt
+            ├── images_variant_val.txt
+            ├── variants.txt
+            └── ....
+    Args:
+        data_root (str): The root directory for FGVC_Aircraft dataset.
+        split (str, optional): The dataset split, supports "train",
+            "val", "trainval", and "test". Default to "trainval".
+    Examples:
+        >>> from mmpretrain.datasets import FGVCAircraft
+        >>> train_dataset = FGVCAircraft(data_root='data/fgvc-aircraft-2013b', split='trainval')
+        >>> train_dataset
+        Dataset FGVCAircraft
+            Number of samples:  6667
+            Number of categories:       100
+            Root of dataset:    data/fgvc-aircraft-2013b
+        >>> test_dataset = FGVCAircraft(data_root='data/fgvc-aircraft-2013b', split='test')
+        >>> test_dataset
+        Dataset FGVCAircraft
+            Number of samples:  3333
+            Number of categories:       100
+            Root of dataset:    data/fgvc-aircraft-2013b
+    """  # noqa: E501
+    METAINFO = {'classes': FGVCAIRCRAFT_CATEGORIES}
+    def __init__(self, data_root: str, split: str = 'trainval', **kwargs):
+        splits = ['train', 'val', 'trainval', 'test']
+        assert split in splits, \
+            f"The split must be one of {splits}, but get '{split}'"
+        self.split = split
+        self.backend = get_file_backend(data_root, enable_singleton=True)
+        ann_file = self.backend.join_path('data',
+                                          f'images_variant_{split}.txt')
+        data_prefix = self.backend.join_path('data', 'images')
+        test_mode = split == 'test'
+        super(FGVCAircraft, self).__init__(
+            ann_file=ann_file,
+            data_root=data_root,
+            test_mode=test_mode,
+            data_prefix=data_prefix,
+            **kwargs)
+    def load_data_list(self):
+        """Load images and ground truth labels."""
+        pairs = list_from_file(self.ann_file)
+        data_list = []
+        for pair in pairs:
+            pair = pair.split()
+            img_name = pair[0]
+            class_name = ' '.join(pair[1:])
+            img_name = f'{img_name}.jpg'
+            img_path = self.backend.join_path(self.img_prefix, img_name)
+            gt_label = self.METAINFO['classes'].index(class_name)
+            info = dict(img_path=img_path, gt_label=gt_label)
+            data_list.append(info)
+        return data_list
+    def extra_repr(self) -> List[str]:
+        """The extra repr information of the dataset."""
+        body = [
+            f'Root of dataset: \t{self.data_root}',
+        ]
+        return body

mmpretrain/datasets/flamingo.py ADDED Viewed

	@@ -0,0 +1,295 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import random
+from abc import abstractmethod
+from collections import Counter
+from typing import List
+import mmengine
+import numpy as np
+from mmengine.dataset import BaseDataset
+from pycocotools.coco import COCO
+from mmpretrain.registry import DATASETS
+from .coco_vqa import COCOVQA
+class FlamingoFewShotMixin:
+    """Flamingo fewshot eval dataset minin.
+    Args:
+        num_shots (int): Number of shots to perform evaluation.
+            Defaults to 0.
+            Note: 0 does not mean a strict zero-shot in Flamingo setting.
+            It will use 2 only-text prompt without in context images.
+        num_support_examples (int): Number of support examples to get the
+            few shots from. Defaults to 2048.
+        num_query_examples (int): Number of query examples to perform the
+            final evaluation. Defaults to 5000.
+        incontext_prompt_temp (str): In context prompt template for few shot
+            examples. Defaults to ''.
+        final_prompt_temp (str): Final query prompt template. Defaults to ''.
+        **kwargs: Other keyword arguments in :class:`BaseDataset`.
+    """
+    def __init__(self,
+                 num_shots: int = 0,
+                 num_support_examples: int = 2048,
+                 num_query_examples: int = 5000,
+                 incontext_prompt_temp: str = '',
+                 final_prompt_temp: str = '',
+                 **kwarg):
+        self.num_shots = num_shots
+        self.num_support_examples = num_support_examples
+        self.num_query_examples = num_query_examples
+        self.incontext_prompt_temp = incontext_prompt_temp
+        self.final_prompt_temp = final_prompt_temp
+        super().__init__(**kwarg)
+    def get_subset_idx(self, total_num):
+        random_idx = np.random.choice(
+            total_num,
+            self.num_support_examples + self.num_query_examples,
+            replace=False)
+        support_idx = random_idx[:self.num_support_examples]
+        query_idx = random_idx[self.num_support_examples:]
+        return support_idx, query_idx
+    @abstractmethod
+    def parse_basic_anno(self, anno: dict) -> dict:
+        """Parse basic annotation for support and query set."""
+        pass
+    @abstractmethod
+    def parse_fewshot_anno(self, anno: dict, support_list: List) -> dict:
+        """Parse fewshot related annotation for query set with support list."""
+        pass
+@DATASETS.register_module()
+class FlamingoEvalCOCOVQA(FlamingoFewShotMixin, COCOVQA):
+    """Flamingo few shot VQAv2 dataset.
+    Args:
+        data_root (str): The root directory for ``data_prefix`` and
+            ``ann_file``.
+        ann_file (str): Annotation file path.
+        question_file (str): Question file path.
+        num_shots (int): Number of shots to perform evaluation.
+            Defaults to 0.
+            Note: 0 does not mean a strict zero-shot in Flamingo setting.
+            It will use 2 only-text prompt without in context images.
+        num_support_examples (int): Number of support examples to get the
+            few shots from. Defaults to 2048.
+        num_query_examples (int): Number of query examples to perform the
+            final evaluation. Defaults to 5000.
+        **kwargs: Other keyword arguments in :class:`BaseDataset`.
+    """
+    def __init__(self,
+                 data_root: str,
+                 question_file: str,
+                 ann_file: str = '',
+                 num_shots: int = 0,
+                 num_support_examples: int = 2048,
+                 num_query_examples: int = 5000,
+                 **kwarg):
+        super().__init__(
+            data_root=data_root,
+            question_file=question_file,
+            ann_file=ann_file,
+            num_shots=num_shots,
+            num_support_examples=num_support_examples,
+            num_query_examples=num_query_examples,
+            **kwarg)
+    def parse_basic_anno(self, ann: dict) -> dict:
+        """Parse basic annotation for support and query set.
+        Args:
+            anno (dict): Annotation for single example.
+        Return:
+            dict: Parsed annotation for single example.
+        """
+        if ann is None:
+            return {}
+        answers = [a['answer'] for a in ann['answers']]
+        count = Counter(answers)
+        answer_weight = [i / len(answers) for i in count.values()]
+        answer_info = {
+            'gt_answer': list(count.keys()),
+            'gt_answer_weight': answer_weight
+        }
+        return answer_info
+    def parse_fewshot_anno(self, query: dict, support_list: List) -> dict:
+        """Parse fewshot related annotation for query set with support list.
+        Args:
+            anno (dict): Annotation for single example.
+            support_list (List): List of support subset to subsample few shots.
+        Return:
+            dict: Parsed annotation for single example.
+        """
+        # prepare n shots examples
+        shots = random.sample(support_list, self.num_shots)
+        # append image path for n shots
+        img_path = [shot['img_path'] for shot in shots]
+        img_path.append(query['img_path'])
+        query['img_path'] = img_path
+        query['shots'] = [
+            dict(
+                question=item['question'],
+                answer=item['gt_answer'][0],
+            ) for item in shots
+        ]
+        return query
+    def load_data_list(self) -> List[dict]:
+        """Load data list."""
+        questions = mmengine.load(self.question_file)['questions']
+        if self.ann_file:
+            annotations = mmengine.load(self.ann_file)['annotations']
+            assert len(questions) == len(annotations)
+        else:
+            annotations = [None] * len(questions)
+            if self.num_shots > 0:
+                raise ValueError('Unable to construct few-shot examples '
+                                 'since no annotation file.')
+        # The original VQAv2 annotation file and question file includes
+        # only image id but no image file paths.
+        self.image_index = self._create_image_index()
+        num_data = len(questions)
+        support_idx, query_idx = self.get_subset_idx(num_data)
+        # prepare support subset
+        if self.num_shots > 0:
+            support_list = []
+            for idx in support_idx:
+                question = questions[idx]
+                ann = annotations[idx]
+                support = {**question, **self.parse_basic_anno(ann)}
+                support['img_path'] = self.image_index[question['image_id']]
+                support_list.append(support)
+        # prepare query subset
+        data_list = []
+        for idx in query_idx:
+            question = questions[idx]
+            ann = annotations[idx]
+            data_info = {**question, **self.parse_basic_anno(ann)}
+            data_info['img_path'] = self.image_index[question['image_id']]
+            if self.num_shots > 0:
+                data_info = self.parse_fewshot_anno(data_info, support_list)
+            data_list.append(data_info)
+        return data_list
+@DATASETS.register_module()
+class FlamingoEvalCOCOCaption(FlamingoFewShotMixin, BaseDataset):
+    """Flamingo few shot COCO Caption dataset.
+    Args:
+        data_root (str): The root directory for ``data_prefix`` and
+            ``ann_file``.
+        ann_file (str): Annotation file path.
+        data_prefix (dict): Prefix for data field. Defaults to
+            ``dict(img_path='')``.
+        num_shots (int): Number of shots to perform evaluation.
+            Defaults to 0.
+        num_support_examples (int): Number of support examples to get the
+            few shots from. Defaults to 2048.
+        num_query_examples (int): Number of query examples to perform the
+            final evaluation. Defaults to 5000.
+        **kwargs: Other keyword arguments in :class:`BaseDataset`.
+    """
+    def __init__(self,
+                 data_root: str,
+                 ann_file: str,
+                 num_shots: int = 0,
+                 num_support_examples: int = 2048,
+                 num_query_examples: int = 5000,
+                 **kwarg):
+        super().__init__(
+            data_root=data_root,
+            ann_file=ann_file,
+            num_shots=num_shots,
+            num_support_examples=num_support_examples,
+            num_query_examples=num_query_examples,
+            **kwarg)
+    def parse_basic_anno(self, ann: dict, coco: COCO) -> dict:
+        """Parse basic annotation for support and query set.
+        Args:
+            anno (dict): Annotation for single example.
+            coco (COCO): The coco dataset.
+        Return:
+            dict: Parsed annotation for single example.
+        """
+        img_prefix = self.data_prefix['img_path']
+        img = coco.imgs[ann['image_id']]
+        data_info = dict(
+            img_path=mmengine.join_path(img_prefix, img['file_name']),
+            gt_caption=ann['caption'],
+            image_id=ann['image_id'],
+        )
+        return data_info
+    def parse_fewshot_anno(self, query: dict, support_list: List) -> dict:
+        """Parse fewshot related annotation for query set with support list.
+        Args:
+            query (dict): Annotation for single example.
+            support_list (List): List of support subset to subsample few shots.
+            coco (COCO): The coco dataset.
+        Return:
+            dict: Parsed annotation for single example.
+        """
+        # prepare n shots examples
+        shots = random.sample(support_list, self.num_shots)
+        # append image path for n shots
+        img_path = [shot['img_path'] for shot in shots]
+        img_path.append(query['img_path'])
+        query['img_path'] = img_path
+        query['shots'] = [dict(caption=item['gt_caption']) for item in shots]
+        return query
+    def load_data_list(self) -> List[dict]:
+        """Load data list."""
+        with mmengine.get_local_path(self.ann_file) as ann_file:
+            coco = COCO(ann_file)
+        num_data = len(coco.anns)
+        support_idx, query_idx = self.get_subset_idx(num_data)
+        ann_ids = list(coco.anns)
+        # prepare support subset
+        if self.num_shots > 0:
+            support_list = []
+            for idx in support_idx:
+                support = self.parse_basic_anno(coco.anns[ann_ids[idx]], coco)
+                support_list.append(support)
+        # prepare query subset
+        query_list = []
+        for idx in query_idx:
+            data_info = self.parse_basic_anno(coco.anns[ann_ids[idx]], coco)
+            if self.num_shots > 0:
+                data_info = self.parse_fewshot_anno(data_info, support_list)
+            query_list.append(data_info)
+        return query_list

mmpretrain/datasets/flowers102.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import List
+import mat4py
+from mmengine import get_file_backend
+from mmpretrain.registry import DATASETS
+from .base_dataset import BaseDataset
+@DATASETS.register_module()
+class Flowers102(BaseDataset):
+    """The Oxford 102 Flower Dataset.
+    Support the `Oxford 102 Flowers Dataset <https://www.robots.ox.ac.uk/~vgg/data/flowers/102/>`_ Dataset.
+    After downloading and decompression, the dataset directory structure is as follows.
+    Flowers102 dataset directory: ::
+        Flowers102
+        ├── jpg
+        │   ├── image_00001.jpg
+        │   ├── image_00002.jpg
+        │   └── ...
+        ├── imagelabels.mat
+        ├── setid.mat
+        └── ...
+    Args:
+        data_root (str): The root directory for Oxford 102 Flowers dataset.
+        split (str, optional): The dataset split, supports "train",
+            "val", "trainval", and "test". Default to "trainval".
+    Examples:
+        >>> from mmpretrain.datasets import Flowers102
+        >>> train_dataset = Flowers102(data_root='data/Flowers102', split='trainval')
+        >>> train_dataset
+        Dataset Flowers102
+            Number of samples:  2040
+            Root of dataset:    data/Flowers102
+        >>> test_dataset = Flowers102(data_root='data/Flowers102', split='test')
+        >>> test_dataset
+        Dataset Flowers102
+            Number of samples:  6149
+            Root of dataset:    data/Flowers102
+    """  # noqa: E501
+    def __init__(self, data_root: str, split: str = 'trainval', **kwargs):
+        splits = ['train', 'val', 'trainval', 'test']
+        assert split in splits, \
+            f"The split must be one of {splits}, but get '{split}'"
+        self.split = split
+        ann_file = 'imagelabels.mat'
+        data_prefix = 'jpg'
+        train_test_split_file = 'setid.mat'
+        test_mode = split == 'test'
+        self.backend = get_file_backend(data_root, enable_singleton=True)
+        self.train_test_split_file = self.backend.join_path(
+            data_root, train_test_split_file)
+        super(Flowers102, self).__init__(
+            ann_file=ann_file,
+            data_root=data_root,
+            data_prefix=data_prefix,
+            test_mode=test_mode,
+            **kwargs)
+    def load_data_list(self):
+        """Load images and ground truth labels."""
+        label_dict = mat4py.loadmat(self.ann_file)['labels']
+        split_list = mat4py.loadmat(self.train_test_split_file)
+        if self.split == 'train':
+            split_list = split_list['trnid']
+        elif self.split == 'val':
+            split_list = split_list['valid']
+        elif self.split == 'test':
+            split_list = split_list['tstid']
+        else:
+            train_ids = split_list['trnid']
+            val_ids = split_list['valid']
+            train_ids.extend(val_ids)
+            split_list = train_ids
+        data_list = []
+        for sample_id in split_list:
+            img_name = 'image_%05d.jpg' % (sample_id)
+            img_path = self.backend.join_path(self.img_prefix, img_name)
+            gt_label = int(label_dict[sample_id - 1]) - 1
+            info = dict(img_path=img_path, gt_label=gt_label)
+            data_list.append(info)
+        return data_list
+    def extra_repr(self) -> List[str]:
+        """The extra repr information of the dataset."""
+        body = [
+            f'Root of dataset: \t{self.data_root}',
+        ]
+        return body

mmpretrain/datasets/food101.py ADDED Viewed

	@@ -0,0 +1,102 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import List
+from mmengine import get_file_backend, list_from_file
+from mmpretrain.registry import DATASETS
+from .base_dataset import BaseDataset
+from .categories import FOOD101_CATEGORIES
+@DATASETS.register_module()
+class Food101(BaseDataset):
+    """The Food101 Dataset.
+    Support the `Food101 Dataset <https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/>`_ Dataset.
+    After downloading and decompression, the dataset directory structure is as follows.
+    Food101 dataset directory: ::
+        food-101
+        ├── images
+        │   ├── class_x
+        │   │   ├── xx1.jpg
+        │   │   ├── xx2.jpg
+        │   │   └── ...
+        │   ├── class_y
+        │   │   ├── yy1.jpg
+        │   │   ├── yy2.jpg
+        │   │   └── ...
+        │   └── ...
+        ├── meta
+        │   ├── train.txt
+        │   └── test.txt
+        └── ....
+    Args:
+        data_root (str): The root directory for Food101 dataset.
+        split (str, optional): The dataset split, supports "train" and "test".
+            Default to "train".
+    Examples:
+        >>> from mmpretrain.datasets import Food101
+        >>> train_dataset = Food101(data_root='data/food-101', split='train')
+        >>> train_dataset
+        Dataset Food101
+            Number of samples:  75750
+            Number of categories:       101
+            Root of dataset:    data/food-101
+        >>> test_dataset = Food101(data_root='data/food-101', split='test')
+        >>> test_dataset
+        Dataset Food101
+            Number of samples:  25250
+            Number of categories:       101
+            Root of dataset:    data/food-101
+    """  # noqa: E501
+    METAINFO = {'classes': FOOD101_CATEGORIES}
+    def __init__(self, data_root: str, split: str = 'train', **kwargs):
+        splits = ['train', 'test']
+        assert split in splits, \
+            f"The split must be one of {splits}, but get '{split}'"
+        self.split = split
+        self.backend = get_file_backend(data_root, enable_singleton=True)
+        if split == 'train':
+            ann_file = self.backend.join_path('meta', 'train.txt')
+        else:
+            ann_file = self.backend.join_path('meta', 'test.txt')
+        test_mode = split == 'test'
+        data_prefix = 'images'
+        super(Food101, self).__init__(
+            ann_file=ann_file,
+            data_root=data_root,
+            test_mode=test_mode,
+            data_prefix=data_prefix,
+            **kwargs)
+    def load_data_list(self):
+        """Load images and ground truth labels."""
+        pairs = list_from_file(self.ann_file)
+        data_list = []
+        for pair in pairs:
+            class_name, img_name = pair.split('/')
+            img_name = f'{img_name}.jpg'
+            img_path = self.backend.join_path(self.img_prefix, class_name,
+                                              img_name)
+            gt_label = self.METAINFO['classes'].index(class_name)
+            info = dict(img_path=img_path, gt_label=gt_label)
+            data_list.append(info)
+        return data_list
+    def extra_repr(self) -> List[str]:
+        """The extra repr information of the dataset."""
+        body = [
+            f'Root of dataset: \t{self.data_root}',
+        ]
+        return body

mmpretrain/datasets/imagenet.py ADDED Viewed

	@@ -0,0 +1,102 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional, Union
+from mmengine.logging import MMLogger
+from mmpretrain.registry import DATASETS
+from .categories import IMAGENET_CATEGORIES
+from .custom import CustomDataset
+@DATASETS.register_module()
+class ImageNet(CustomDataset):
+    """`ImageNet <http://www.image-net.org>`_ Dataset.
+    The dataset supports two kinds of annotation format. More details can be
+    found in :class:`CustomDataset`.
+    Args:
+        data_root (str): The root directory for ``data_prefix`` and
+            ``ann_file``. Defaults to ''.
+        data_prefix (str | dict): Prefix for training data. Defaults to ''.
+        ann_file (str): Annotation file path. Defaults to ''.
+        metainfo (dict, optional): Meta information for dataset, such as class
+            information. Defaults to None.
+        **kwargs: Other keyword arguments in :class:`CustomDataset` and
+            :class:`BaseDataset`.
+    """  # noqa: E501
+    IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif')
+    METAINFO = {'classes': IMAGENET_CATEGORIES}
+    def __init__(self,
+                 data_root: str = '',
+                 data_prefix: Union[str, dict] = '',
+                 ann_file: str = '',
+                 metainfo: Optional[dict] = None,
+                 **kwargs):
+        kwargs = {'extensions': self.IMG_EXTENSIONS, **kwargs}
+        super().__init__(
+            data_root=data_root,
+            data_prefix=data_prefix,
+            ann_file=ann_file,
+            metainfo=metainfo,
+            **kwargs)
+@DATASETS.register_module()
+class ImageNet21k(CustomDataset):
+    """ImageNet21k Dataset.
+    Since the dataset ImageNet21k is extremely big, cantains 21k+ classes
+    and 1.4B files. We won't provide the default categories list. Please
+    specify it from the ``classes`` argument.
+    Args:
+        data_root (str): The root directory for ``data_prefix`` and
+            ``ann_file``. Defaults to ''.
+        data_prefix (str | dict): Prefix for training data. Defaults to ''.
+        ann_file (str): Annotation file path. Defaults to ''.
+        metainfo (dict, optional): Meta information for dataset, such as class
+            information. Defaults to None.
+        multi_label (bool): Not implement by now. Use multi label or not.
+            Defaults to False.
+        **kwargs: Other keyword arguments in :class:`CustomDataset` and
+            :class:`BaseDataset`.
+    """
+    IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif')
+    def __init__(self,
+                 data_root: str = '',
+                 data_prefix: Union[str, dict] = '',
+                 ann_file: str = '',
+                 metainfo: Optional[dict] = None,
+                 multi_label: bool = False,
+                 **kwargs):
+        if multi_label:
+            raise NotImplementedError(
+                'The `multi_label` option is not supported by now.')
+        self.multi_label = multi_label
+        logger = MMLogger.get_current_instance()
+        if not ann_file:
+            logger.warning(
+                'The ImageNet21k dataset is large, and scanning directory may '
+                'consume long time. Considering to specify the `ann_file` to '
+                'accelerate the initialization.')
+        kwargs = {'extensions': self.IMG_EXTENSIONS, **kwargs}
+        super().__init__(
+            data_root=data_root,
+            data_prefix=data_prefix,
+            ann_file=ann_file,
+            metainfo=metainfo,
+            **kwargs)
+        if self.CLASSES is None:
+            logger.warning(
+                'The CLASSES is not stored in the `ImageNet21k` class. '
+                'Considering to specify the `classes` argument if you need '
+                'do inference on the ImageNet-21k dataset')

mmpretrain/datasets/inshop.py ADDED Viewed

	@@ -0,0 +1,157 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmengine import get_file_backend, list_from_file
+from mmpretrain.registry import DATASETS
+from .base_dataset import BaseDataset
+@DATASETS.register_module()
+class InShop(BaseDataset):
+    """InShop Dataset for Image Retrieval.
+    Please download the images from the homepage
+    'https://mmlab.ie.cuhk.edu.hk/projects/DeepFashion/InShopRetrieval.html'
+    (In-shop Clothes Retrieval Benchmark -> Img -> img.zip,
+    Eval/list_eval_partition.txt), and organize them as follows way: ::
+        In-shop Clothes Retrieval Benchmark (data_root)/
+           ├── Eval /
+           │    └── list_eval_partition.txt (ann_file)
+           ├── Img (img_prefix)
+           │    └── img/
+           ├── README.txt
+           └── .....
+    Args:
+        data_root (str): The root directory for dataset.
+        split (str): Choose from 'train', 'query' and 'gallery'.
+            Defaults to 'train'.
+        data_prefix (str | dict): Prefix for training data.
+            Defaults to 'Img'.
+        ann_file (str): Annotation file path, path relative to
+            ``data_root``. Defaults to 'Eval/list_eval_partition.txt'.
+        **kwargs: Other keyword arguments in :class:`BaseDataset`.
+    Examples:
+        >>> from mmpretrain.datasets import InShop
+        >>>
+        >>> # build train InShop dataset
+        >>> inshop_train_cfg = dict(data_root='data/inshop', split='train')
+        >>> inshop_train = InShop(**inshop_train_cfg)
+        >>> inshop_train
+        Dataset InShop
+            Number of samples:  25882
+            The `CLASSES` meta info is not set.
+            Root of dataset:    data/inshop
+        >>>
+        >>> # build query InShop dataset
+        >>> inshop_query_cfg =  dict(data_root='data/inshop', split='query')
+        >>> inshop_query = InShop(**inshop_query_cfg)
+        >>> inshop_query
+        Dataset InShop
+            Number of samples:  14218
+            The `CLASSES` meta info is not set.
+            Root of dataset:    data/inshop
+        >>>
+        >>> # build gallery InShop dataset
+        >>> inshop_gallery_cfg = dict(data_root='data/inshop', split='gallery')
+        >>> inshop_gallery = InShop(**inshop_gallery_cfg)
+        >>> inshop_gallery
+        Dataset InShop
+            Number of samples:  12612
+            The `CLASSES` meta info is not set.
+            Root of dataset:    data/inshop
+    """
+    def __init__(self,
+                 data_root: str,
+                 split: str = 'train',
+                 data_prefix: str = 'Img',
+                 ann_file: str = 'Eval/list_eval_partition.txt',
+                 **kwargs):
+        assert split in ('train', 'query', 'gallery'), "'split' of `InShop`" \
+            f" must be one of ['train', 'query', 'gallery'], bu get '{split}'"
+        self.backend = get_file_backend(data_root, enable_singleton=True)
+        self.split = split
+        super().__init__(
+            data_root=data_root,
+            data_prefix=data_prefix,
+            ann_file=ann_file,
+            **kwargs)
+    def _process_annotations(self):
+        lines = list_from_file(self.ann_file)
+        anno_train = dict(metainfo=dict(), data_list=list())
+        anno_gallery = dict(metainfo=dict(), data_list=list())
+        # item_id to label, each item corresponds to one class label
+        class_num = 0
+        gt_label_train = {}
+        # item_id to label, each label corresponds to several items
+        gallery_num = 0
+        gt_label_gallery = {}
+        # (lines[0], lines[1]) is the image number and the field name;
+        # Each line format as 'image_name, item_id, evaluation_status'
+        for line in lines[2:]:
+            img_name, item_id, status = line.split()
+            img_path = self.backend.join_path(self.img_prefix, img_name)
+            if status == 'train':
+                if item_id not in gt_label_train:
+                    gt_label_train[item_id] = class_num
+                    class_num += 1
+                # item_id to class_id (for the training set)
+                anno_train['data_list'].append(
+                    dict(img_path=img_path, gt_label=gt_label_train[item_id]))
+            elif status == 'gallery':
+                if item_id not in gt_label_gallery:
+                    gt_label_gallery[item_id] = []
+                # Since there are multiple images for each item,
+                # record the corresponding item for each image.
+                gt_label_gallery[item_id].append(gallery_num)
+                anno_gallery['data_list'].append(
+                    dict(img_path=img_path, sample_idx=gallery_num))
+                gallery_num += 1
+        if self.split == 'train':
+            anno_train['metainfo']['class_number'] = class_num
+            anno_train['metainfo']['sample_number'] = \
+                len(anno_train['data_list'])
+            return anno_train
+        elif self.split == 'gallery':
+            anno_gallery['metainfo']['sample_number'] = gallery_num
+            return anno_gallery
+        # Generate the label for the query(val) set
+        anno_query = dict(metainfo=dict(), data_list=list())
+        query_num = 0
+        for line in lines[2:]:
+            img_name, item_id, status = line.split()
+            img_path = self.backend.join_path(self.img_prefix, img_name)
+            if status == 'query':
+                anno_query['data_list'].append(
+                    dict(
+                        img_path=img_path, gt_label=gt_label_gallery[item_id]))
+                query_num += 1
+        anno_query['metainfo']['sample_number'] = query_num
+        return anno_query
+    def load_data_list(self):
+        """load data list.
+        For the train set, return image and ground truth label. For the query
+        set, return image and ids of images in gallery. For the gallery set,
+        return image and its id.
+        """
+        data_info = self._process_annotations()
+        data_list = data_info['data_list']
+        return data_list
+    def extra_repr(self):
+        """The extra repr information of the dataset."""
+        body = [f'Root of dataset: \t{self.data_root}']
+        return body

mmpretrain/datasets/mnist.py ADDED Viewed

	@@ -0,0 +1,220 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import codecs
+from typing import List, Optional
+from urllib.parse import urljoin
+import mmengine.dist as dist
+import numpy as np
+import torch
+from mmengine.fileio import LocalBackend, exists, get_file_backend, join_path
+from mmpretrain.registry import DATASETS
+from .base_dataset import BaseDataset
+from .categories import FASHIONMNIST_CATEGORITES, MNIST_CATEGORITES
+from .utils import (download_and_extract_archive, open_maybe_compressed_file,
+                    rm_suffix)
+@DATASETS.register_module()
+class MNIST(BaseDataset):
+    """`MNIST <http://yann.lecun.com/exdb/mnist/>`_ Dataset.
+    This implementation is modified from
+    https://github.com/pytorch/vision/blob/master/torchvision/datasets/mnist.py
+    Args:
+        data_prefix (str): Prefix for data.
+        test_mode (bool): ``test_mode=True`` means in test phase.
+            It determines to use the training set or test set.
+        metainfo (dict, optional): Meta information for dataset, such as
+            categories information. Defaults to None.
+        data_root (str): The root directory for ``data_prefix``.
+            Defaults to ''.
+        download (bool): Whether to download the dataset if not exists.
+            Defaults to True.
+        **kwargs: Other keyword arguments in :class:`BaseDataset`.
+    """  # noqa: E501
+    url_prefix = 'http://yann.lecun.com/exdb/mnist/'
+    # train images and labels
+    train_list = [
+        ['train-images-idx3-ubyte.gz', 'f68b3c2dcbeaaa9fbdd348bbdeb94873'],
+        ['train-labels-idx1-ubyte.gz', 'd53e105ee54ea40749a09fcbcd1e9432'],
+    ]
+    # test images and labels
+    test_list = [
+        ['t10k-images-idx3-ubyte.gz', '9fb629c4189551a2d022fa330f9573f3'],
+        ['t10k-labels-idx1-ubyte.gz', 'ec29112dd5afa0611ce80d1b7f02629c'],
+    ]
+    METAINFO = {'classes': MNIST_CATEGORITES}
+    def __init__(self,
+                 data_prefix: str,
+                 test_mode: bool,
+                 metainfo: Optional[dict] = None,
+                 data_root: str = '',
+                 download: bool = True,
+                 **kwargs):
+        self.download = download
+        super().__init__(
+            # The MNIST dataset doesn't need specify annotation file
+            ann_file='',
+            metainfo=metainfo,
+            data_root=data_root,
+            data_prefix=dict(root=data_prefix),
+            test_mode=test_mode,
+            **kwargs)
+    def load_data_list(self):
+        """Load images and ground truth labels."""
+        root = self.data_prefix['root']
+        backend = get_file_backend(root, enable_singleton=True)
+        if dist.is_main_process() and not self._check_exists():
+            if not isinstance(backend, LocalBackend):
+                raise RuntimeError(f'The dataset on {root} is not integrated, '
+                                   f'please manually handle it.')
+            if self.download:
+                self._download()
+            else:
+                raise RuntimeError(
+                    f'Cannot find {self.__class__.__name__} dataset in '
+                    f"{self.data_prefix['root']}, you can specify "
+                    '`download=True` to download automatically.')
+        dist.barrier()
+        assert self._check_exists(), \
+            'Download failed or shared storage is unavailable. Please ' \
+            f'download the dataset manually through {self.url_prefix}.'
+        if not self.test_mode:
+            file_list = self.train_list
+        else:
+            file_list = self.test_list
+        # load data from SN3 files
+        imgs = read_image_file(join_path(root, rm_suffix(file_list[0][0])))
+        gt_labels = read_label_file(
+            join_path(root, rm_suffix(file_list[1][0])))
+        data_infos = []
+        for img, gt_label in zip(imgs, gt_labels):
+            gt_label = np.array(gt_label, dtype=np.int64)
+            info = {'img': img.numpy(), 'gt_label': gt_label}
+            data_infos.append(info)
+        return data_infos
+    def _check_exists(self):
+        """Check the exists of data files."""
+        root = self.data_prefix['root']
+        for filename, _ in (self.train_list + self.test_list):
+            # get extracted filename of data
+            extract_filename = rm_suffix(filename)
+            fpath = join_path(root, extract_filename)
+            if not exists(fpath):
+                return False
+        return True
+    def _download(self):
+        """Download and extract data files."""
+        root = self.data_prefix['root']
+        for filename, md5 in (self.train_list + self.test_list):
+            url = urljoin(self.url_prefix, filename)
+            download_and_extract_archive(
+                url, download_root=root, filename=filename, md5=md5)
+    def extra_repr(self) -> List[str]:
+        """The extra repr information of the dataset."""
+        body = [f"Prefix of data: \t{self.data_prefix['root']}"]
+        return body
+@DATASETS.register_module()
+class FashionMNIST(MNIST):
+    """`Fashion-MNIST <https://github.com/zalandoresearch/fashion-mnist>`_
+    Dataset.
+    Args:
+        data_prefix (str): Prefix for data.
+        test_mode (bool): ``test_mode=True`` means in test phase.
+            It determines to use the training set or test set.
+        metainfo (dict, optional): Meta information for dataset, such as
+            categories information. Defaults to None.
+        data_root (str): The root directory for ``data_prefix``.
+            Defaults to ''.
+        download (bool): Whether to download the dataset if not exists.
+            Defaults to True.
+        **kwargs: Other keyword arguments in :class:`BaseDataset`.
+    """
+    url_prefix = 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/'
+    # train images and labels
+    train_list = [
+        ['train-images-idx3-ubyte.gz', '8d4fb7e6c68d591d4c3dfef9ec88bf0d'],
+        ['train-labels-idx1-ubyte.gz', '25c81989df183df01b3e8a0aad5dffbe'],
+    ]
+    # test images and labels
+    test_list = [
+        ['t10k-images-idx3-ubyte.gz', 'bef4ecab320f06d8554ea6380940ec79'],
+        ['t10k-labels-idx1-ubyte.gz', 'bb300cfdad3c16e7a12a480ee83cd310'],
+    ]
+    METAINFO = {'classes': FASHIONMNIST_CATEGORITES}
+def get_int(b: bytes) -> int:
+    """Convert bytes to int."""
+    return int(codecs.encode(b, 'hex'), 16)
+def read_sn3_pascalvincent_tensor(path: str,
+                                  strict: bool = True) -> torch.Tensor:
+    """Read a SN3 file in "Pascal Vincent" format (Lush file 'libidx/idx-
+    io.lsh').
+    Argument may be a filename, compressed filename, or file object.
+    """
+    # typemap
+    if not hasattr(read_sn3_pascalvincent_tensor, 'typemap'):
+        read_sn3_pascalvincent_tensor.typemap = {
+            8: (torch.uint8, np.uint8, np.uint8),
+            9: (torch.int8, np.int8, np.int8),
+            11: (torch.int16, np.dtype('>i2'), 'i2'),
+            12: (torch.int32, np.dtype('>i4'), 'i4'),
+            13: (torch.float32, np.dtype('>f4'), 'f4'),
+            14: (torch.float64, np.dtype('>f8'), 'f8')
+        }
+    # read
+    with open_maybe_compressed_file(path) as f:
+        data = f.read()
+    # parse
+    magic = get_int(data[0:4])
+    nd = magic % 256
+    ty = magic // 256
+    assert nd >= 1 and nd <= 3
+    assert ty >= 8 and ty <= 14
+    m = read_sn3_pascalvincent_tensor.typemap[ty]
+    s = [get_int(data[4 * (i + 1):4 * (i + 2)]) for i in range(nd)]
+    parsed = np.frombuffer(data, dtype=m[1], offset=(4 * (nd + 1)))
+    assert parsed.shape[0] == np.prod(s) or not strict
+    return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)
+def read_label_file(path: str) -> torch.Tensor:
+    """Read labels from SN3 label file."""
+    with open(path, 'rb') as f:
+        x = read_sn3_pascalvincent_tensor(f, strict=False)
+    assert (x.dtype == torch.uint8)
+    assert (x.ndimension() == 1)
+    return x.long()
+def read_image_file(path: str) -> torch.Tensor:
+    """Read images from SN3 image file."""
+    with open(path, 'rb') as f:
+        x = read_sn3_pascalvincent_tensor(f, strict=False)
+    assert (x.dtype == torch.uint8)
+    assert (x.ndimension() == 3)
+    return x

mmpretrain/datasets/multi_label.py ADDED Viewed

	@@ -0,0 +1,85 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import List
+from mmpretrain.registry import DATASETS
+from .base_dataset import BaseDataset
+@DATASETS.register_module()
+class MultiLabelDataset(BaseDataset):
+    """Multi-label Dataset.
+    This dataset support annotation file in `OpenMMLab 2.0 style annotation
+    format`.
+    The annotation format is shown as follows.
+    .. code-block:: none
+        {
+            "metainfo":
+            {
+              "classes":['A', 'B', 'C'....]
+            },
+            "data_list":
+            [
+              {
+                "img_path": "test_img1.jpg",
+                'gt_label': [0, 1],
+              },
+              {
+                "img_path": "test_img2.jpg",
+                'gt_label': [2],
+              },
+            ]
+            ....
+        }
+    Args:
+        ann_file (str): Annotation file path.
+        metainfo (dict, optional): Meta information for dataset, such as class
+            information. Defaults to None.
+        data_root (str): The root directory for ``data_prefix`` and
+            ``ann_file``. Defaults to ''.
+        data_prefix (str | dict): Prefix for training data. Defaults to ''.
+        filter_cfg (dict, optional): Config for filter data. Defaults to None.
+        indices (int or Sequence[int], optional): Support using first few
+            data in annotation file to facilitate training/testing on a smaller
+            dataset. Defaults to None which means using all ``data_infos``.
+        serialize_data (bool, optional): Whether to hold memory using
+            serialized objects, when enabled, data loader workers can use
+            shared RAM from master process instead of making a copy. Defaults
+            to True.
+        pipeline (list, optional): Processing pipeline. Defaults to [].
+        test_mode (bool, optional): ``test_mode=True`` means in test phase.
+            Defaults to False.
+        lazy_init (bool, optional): Whether to load annotation during
+            instantiation. In some cases, such as visualization, only the meta
+            information of the dataset is needed, which is not necessary to
+            load annotation file. ``Basedataset`` can skip load annotations to
+            save time by set ``lazy_init=False``. Defaults to False.
+        max_refetch (int, optional): If ``Basedataset.prepare_data`` get a
+            None img. The maximum extra number of cycles to get a valid
+            image. Defaults to 1000.
+        classes (str | Sequence[str], optional): Specify names of classes.
+            - If is string, it should be a file path, and the every line of
+              the file is a name of a class.
+            - If is a sequence of string, every item is a name of class.
+            - If is None, use categories information in ``metainfo`` argument,
+              annotation file or the class attribute ``METAINFO``.
+            Defaults to None.
+    """
+    def get_cat_ids(self, idx: int) -> List[int]:
+        """Get category ids by index.
+        Args:
+            idx (int): Index of data.
+        Returns:
+            cat_ids (List[int]): Image categories of specified index.
+        """
+        return self.get_data_info(idx)['gt_label']

mmpretrain/datasets/multi_task.py ADDED Viewed

	@@ -0,0 +1,337 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import os.path as osp
+from os import PathLike
+from typing import Optional, Sequence
+import mmengine
+from mmcv.transforms import Compose
+from mmengine.fileio import get_file_backend
+from .builder import DATASETS
+def expanduser(path):
+    if isinstance(path, (str, PathLike)):
+        return osp.expanduser(path)
+    else:
+        return path
+def isabs(uri):
+    return osp.isabs(uri) or ('://' in uri)
+@DATASETS.register_module()
+class MultiTaskDataset:
+    """Custom dataset for multi-task dataset.
+    To use the dataset, please generate and provide an annotation file in the
+    below format:
+    .. code-block:: json
+        {
+          "metainfo": {
+            "tasks":
+              [
+              'gender'
+              'wear'
+              ]
+          },
+          "data_list": [
+            {
+              "img_path": "a.jpg",
+              gt_label:{
+                  "gender": 0,
+                  "wear": [1, 0, 1, 0]
+                }
+            },
+            {
+              "img_path": "b.jpg",
+              gt_label:{
+                  "gender": 1,
+                  "wear": [1, 0, 1, 0]
+                }
+            }
+          ]
+        }
+    Assume we put our dataset in the ``data/mydataset`` folder in the
+    repository and organize it as the below format: ::
+        mmpretrain/
+        └── data
+            └── mydataset
+                ├── annotation
+                │   ├── train.json
+                │   ├── test.json
+                │   └── val.json
+                ├── train
+                │   ├── a.jpg
+                │   └── ...
+                ├── test
+                │   ├── b.jpg
+                │   └── ...
+                └── val
+                    ├── c.jpg
+                    └── ...
+    We can use the below config to build datasets:
+    .. code:: python
+        >>> from mmpretrain.datasets import build_dataset
+        >>> train_cfg = dict(
+        ...     type="MultiTaskDataset",
+        ...     ann_file="annotation/train.json",
+        ...     data_root="data/mydataset",
+        ...     # The `img_path` field in the train annotation file is relative
+        ...     # to the `train` folder.
+        ...     data_prefix='train',
+        ... )
+        >>> train_dataset = build_dataset(train_cfg)
+    Or we can put all files in the same folder: ::
+        mmpretrain/
+        └── data
+            └── mydataset
+                 ├── train.json
+                 ├── test.json
+                 ├── val.json
+                 ├── a.jpg
+                 ├── b.jpg
+                 ├── c.jpg
+                 └── ...
+    And we can use the below config to build datasets:
+    .. code:: python
+        >>> from mmpretrain.datasets import build_dataset
+        >>> train_cfg = dict(
+        ...     type="MultiTaskDataset",
+        ...     ann_file="train.json",
+        ...     data_root="data/mydataset",
+        ...     # the `data_prefix` is not required since all paths are
+        ...     # relative to the `data_root`.
+        ... )
+        >>> train_dataset = build_dataset(train_cfg)
+    Args:
+        ann_file (str): The annotation file path. It can be either absolute
+            path or relative path to the ``data_root``.
+        metainfo (dict, optional): The extra meta information. It should be
+            a dict with the same format as the ``"metainfo"`` field in the
+            annotation file. Defaults to None.
+        data_root (str, optional): The root path of the data directory. It's
+            the prefix of the ``data_prefix`` and the ``ann_file``. And it can
+            be a remote path like "s3://openmmlab/xxx/". Defaults to None.
+        data_prefix (str, optional): The base folder relative to the
+            ``data_root`` for the ``"img_path"`` field in the annotation file.
+            Defaults to None.
+        pipeline (Sequence[dict]): A list of dict, where each element
+            represents a operation defined in
+            :mod:`mmpretrain.datasets.pipelines`. Defaults to an empty tuple.
+        test_mode (bool): in train mode or test mode. Defaults to False.
+    """
+    METAINFO = dict()
+    def __init__(self,
+                 ann_file: str,
+                 metainfo: Optional[dict] = None,
+                 data_root: Optional[str] = None,
+                 data_prefix: Optional[str] = None,
+                 pipeline: Sequence = (),
+                 test_mode: bool = False):
+        self.data_root = expanduser(data_root)
+        # Inference the file client
+        if self.data_root is not None:
+            self.file_backend = get_file_backend(uri=self.data_root)
+        else:
+            self.file_backend = None
+        self.ann_file = self._join_root(expanduser(ann_file))
+        self.data_prefix = self._join_root(data_prefix)
+        self.test_mode = test_mode
+        self.pipeline = Compose(pipeline)
+        self.data_list = self.load_data_list(self.ann_file, metainfo)
+    def _join_root(self, path):
+        """Join ``self.data_root`` with the specified path.
+        If the path is an absolute path, just return the path. And if the
+        path is None, return ``self.data_root``.
+        Examples:
+            >>> self.data_root = 'a/b/c'
+            >>> self._join_root('d/e/')
+            'a/b/c/d/e'
+            >>> self._join_root('https://openmmlab.com')
+            'https://openmmlab.com'
+            >>> self._join_root(None)
+            'a/b/c'
+        """
+        if path is None:
+            return self.data_root
+        if isabs(path):
+            return path
+        joined_path = self.file_backend.join_path(self.data_root, path)
+        return joined_path
+    @classmethod
+    def _get_meta_info(cls, in_metainfo: dict = None) -> dict:
+        """Collect meta information from the dictionary of meta.
+        Args:
+            in_metainfo (dict): Meta information dict.
+        Returns:
+            dict: Parsed meta information.
+        """
+        # `cls.METAINFO` will be overwritten by in_meta
+        metainfo = copy.deepcopy(cls.METAINFO)
+        if in_metainfo is None:
+            return metainfo
+        metainfo.update(in_metainfo)
+        return metainfo
+    def load_data_list(self, ann_file, metainfo_override=None):
+        """Load annotations from an annotation file.
+        Args:
+            ann_file (str): Absolute annotation file path if ``self.root=None``
+                or relative path if ``self.root=/path/to/data/``.
+        Returns:
+            list[dict]: A list of annotation.
+        """
+        annotations = mmengine.load(ann_file)
+        if not isinstance(annotations, dict):
+            raise TypeError(f'The annotations loaded from annotation file '
+                            f'should be a dict, but got {type(annotations)}!')
+        if 'data_list' not in annotations:
+            raise ValueError('The annotation file must have the `data_list` '
+                             'field.')
+        metainfo = annotations.get('metainfo', {})
+        raw_data_list = annotations['data_list']
+        # Set meta information.
+        assert isinstance(metainfo, dict), 'The `metainfo` field in the '\
+            f'annotation file should be a dict, but got {type(metainfo)}'
+        if metainfo_override is not None:
+            assert isinstance(metainfo_override, dict), 'The `metainfo` ' \
+                f'argument should be a dict, but got {type(metainfo_override)}'
+            metainfo.update(metainfo_override)
+        self._metainfo = self._get_meta_info(metainfo)
+        data_list = []
+        for i, raw_data in enumerate(raw_data_list):
+            try:
+                data_list.append(self.parse_data_info(raw_data))
+            except AssertionError as e:
+                raise RuntimeError(
+                    f'The format check fails during parse the item {i} of '
+                    f'the annotation file with error: {e}')
+        return data_list
+    def parse_data_info(self, raw_data):
+        """Parse raw annotation to target format.
+        This method will return a dict which contains the data information of a
+        sample.
+        Args:
+            raw_data (dict): Raw data information load from ``ann_file``
+        Returns:
+            dict: Parsed annotation.
+        """
+        assert isinstance(raw_data, dict), \
+            f'The item should be a dict, but got {type(raw_data)}'
+        assert 'img_path' in raw_data, \
+            "The item doesn't have `img_path` field."
+        data = dict(
+            img_path=self._join_root(raw_data['img_path']),
+            gt_label=raw_data['gt_label'],
+        )
+        return data
+    @property
+    def metainfo(self) -> dict:
+        """Get meta information of dataset.
+        Returns:
+            dict: meta information collected from ``cls.METAINFO``,
+            annotation file and metainfo argument during instantiation.
+        """
+        return copy.deepcopy(self._metainfo)
+    def prepare_data(self, idx):
+        """Get data processed by ``self.pipeline``.
+        Args:
+            idx (int): The index of ``data_info``.
+        Returns:
+            Any: Depends on ``self.pipeline``.
+        """
+        results = copy.deepcopy(self.data_list[idx])
+        return self.pipeline(results)
+    def __len__(self):
+        """Get the length of the whole dataset.
+        Returns:
+            int: The length of filtered dataset.
+        """
+        return len(self.data_list)
+    def __getitem__(self, idx):
+        """Get the idx-th image and data information of dataset after
+        ``self.pipeline``.
+        Args:
+            idx (int): The index of of the data.
+        Returns:
+            dict: The idx-th image and data information after
+            ``self.pipeline``.
+        """
+        return self.prepare_data(idx)
+    def __repr__(self):
+        """Print the basic information of the dataset.
+        Returns:
+            str: Formatted string.
+        """
+        head = 'Dataset ' + self.__class__.__name__
+        body = [f'Number of samples: \t{self.__len__()}']
+        if self.data_root is not None:
+            body.append(f'Root location: \t{self.data_root}')
+        body.append(f'Annotation file: \t{self.ann_file}')
+        if self.data_prefix is not None:
+            body.append(f'Prefix of images: \t{self.data_prefix}')
+        # -------------------- extra repr --------------------
+        tasks = self.metainfo['tasks']
+        body.append(f'For {len(tasks)} tasks')
+        for task in tasks:
+            body.append(f' {task} ')
+        # ----------------------------------------------------
+        if len(self.pipeline.transforms) > 0:
+            body.append('With transforms:')
+            for t in self.pipeline.transforms:
+                body.append(f'    {t}')
+        lines = [head] + [' ' * 4 + line for line in body]
+        return '\n'.join(lines)

mmpretrain/datasets/nlvr2.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import json
+from typing import List
+from mmengine.fileio import get_file_backend, list_from_file
+from mmpretrain.registry import DATASETS
+from .base_dataset import BaseDataset
+@DATASETS.register_module()
+class NLVR2(BaseDataset):
+    """COCO Caption dataset."""
+    def load_data_list(self) -> List[dict]:
+        """Load data list."""
+        data_list = []
+        img_prefix = self.data_prefix['img_path']
+        file_backend = get_file_backend(img_prefix)
+        examples = list_from_file(self.ann_file)
+        for example in examples:
+            example = json.loads(example)
+            prefix = example['identifier'].rsplit('-', 1)[0]
+            train_data = {}
+            train_data['text'] = example['sentence']
+            train_data['gt_label'] = {'True': 1, 'False': 0}[example['label']]
+            train_data['img_path'] = [
+                file_backend.join_path(img_prefix, prefix + f'-img{i}.png')
+                for i in range(2)
+            ]
+            data_list.append(train_data)
+        return data_list

mmpretrain/datasets/oxfordiiitpet.py ADDED Viewed

	@@ -0,0 +1,97 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import List
+from mmengine import get_file_backend, list_from_file
+from mmpretrain.registry import DATASETS
+from .base_dataset import BaseDataset
+from .categories import OxfordIIITPet_CATEGORIES
+@DATASETS.register_module()
+class OxfordIIITPet(BaseDataset):
+    """The Oxford-IIIT Pets Dataset.
+    Support the `Oxford-IIIT Pets Dataset <https://www.robots.ox.ac.uk/~vgg/data/pets/>`_ Dataset.
+    After downloading and decompression, the dataset directory structure is as follows.
+    Oxford-IIIT_Pets dataset directory: ::
+        Oxford-IIIT_Pets
+        ├── images
+        │   ├── Abyssinian_1.jpg
+        │   ├── Abyssinian_2.jpg
+        │   └── ...
+        ├── annotations
+        │   ├── trainval.txt
+        │   ├── test.txt
+        │   ├── list.txt
+        │   └── ...
+        └── ....
+    Args:
+        data_root (str): The root directory for Oxford-IIIT Pets dataset.
+        split (str, optional): The dataset split, supports "trainval" and "test".
+            Default to "trainval".
+    Examples:
+        >>> from mmpretrain.datasets import OxfordIIITPet
+        >>> train_dataset = OxfordIIITPet(data_root='data/Oxford-IIIT_Pets', split='trainval')
+        >>> train_dataset
+        Dataset OxfordIIITPet
+            Number of samples:  3680
+            Number of categories:       37
+            Root of dataset:    data/Oxford-IIIT_Pets
+        >>> test_dataset = OxfordIIITPet(data_root='data/Oxford-IIIT_Pets', split='test')
+        >>> test_dataset
+        Dataset OxfordIIITPet
+            Number of samples:  3669
+            Number of categories:       37
+            Root of dataset:    data/Oxford-IIIT_Pets
+    """  # noqa: E501
+    METAINFO = {'classes': OxfordIIITPet_CATEGORIES}
+    def __init__(self, data_root: str, split: str = 'trainval', **kwargs):
+        splits = ['trainval', 'test']
+        assert split in splits, \
+            f"The split must be one of {splits}, but get '{split}'"
+        self.split = split
+        self.backend = get_file_backend(data_root, enable_singleton=True)
+        if split == 'trainval':
+            ann_file = self.backend.join_path('annotations', 'trainval.txt')
+        else:
+            ann_file = self.backend.join_path('annotations', 'test.txt')
+        data_prefix = 'images'
+        test_mode = split == 'test'
+        super(OxfordIIITPet, self).__init__(
+            ann_file=ann_file,
+            data_root=data_root,
+            data_prefix=data_prefix,
+            test_mode=test_mode,
+            **kwargs)
+    def load_data_list(self):
+        """Load images and ground truth labels."""
+        pairs = list_from_file(self.ann_file)
+        data_list = []
+        for pair in pairs:
+            img_name, class_id, _, _ = pair.split()
+            img_name = f'{img_name}.jpg'
+            img_path = self.backend.join_path(self.img_prefix, img_name)
+            gt_label = int(class_id) - 1
+            info = dict(img_path=img_path, gt_label=gt_label)
+            data_list.append(info)
+        return data_list
+    def extra_repr(self) -> List[str]:
+        """The extra repr information of the dataset."""
+        body = [
+            f'Root of dataset: \t{self.data_root}',
+        ]
+        return body

mmpretrain/datasets/places205.py ADDED Viewed

	@@ -0,0 +1,40 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional, Union
+from mmpretrain.registry import DATASETS
+from .categories import PLACES205_CATEGORIES
+from .custom import CustomDataset
+@DATASETS.register_module()
+class Places205(CustomDataset):
+    """`Places205 <http://places.csail.mit.edu/downloadData.html>`_ Dataset.
+    Args:
+        data_root (str): The root directory for ``data_prefix`` and
+            ``ann_file``. Defaults to ''.
+        data_prefix (str | dict): Prefix for training data. Defaults
+            to ''.
+        ann_file (str): Annotation file path. Defaults to ''.
+        metainfo (dict, optional): Meta information for dataset, such as class
+            information. Defaults to None.
+        **kwargs: Other keyword arguments in :class:`CustomDataset` and
+            :class:`BaseDataset`.
+    """
+    IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif')
+    METAINFO = {'classes': PLACES205_CATEGORIES}
+    def __init__(self,
+                 data_root: str = '',
+                 data_prefix: Union[str, dict] = '',
+                 ann_file: str = '',
+                 metainfo: Optional[dict] = None,
+                 **kwargs):
+        kwargs = {'extensions': self.IMG_EXTENSIONS, **kwargs}
+        super().__init__(
+            data_root=data_root,
+            data_prefix=data_prefix,
+            ann_file=ann_file,
+            metainfo=metainfo,
+            **kwargs)

mmpretrain/datasets/refcoco.py ADDED Viewed

	@@ -0,0 +1,81 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+from typing import List
+import mmengine
+import numpy as np
+from mmengine.dataset import BaseDataset
+from pycocotools.coco import COCO
+from mmpretrain.registry import DATASETS
+@DATASETS.register_module()
+class RefCOCO(BaseDataset):
+    """RefCOCO dataset.
+    Args:
+        ann_file (str): Annotation file path.
+        data_root (str): The root directory for ``data_prefix`` and
+            ``ann_file``. Defaults to ''.
+        data_prefix (str): Prefix for training data.
+        pipeline (Sequence): Processing pipeline. Defaults to an empty tuple.
+        **kwargs: Other keyword arguments in :class:`BaseDataset`.
+    """
+    def __init__(self,
+                 data_root,
+                 ann_file,
+                 data_prefix,
+                 split_file,
+                 split='train',
+                 **kwargs):
+        self.split_file = split_file
+        self.split = split
+        super().__init__(
+            data_root=data_root,
+            data_prefix=dict(img_path=data_prefix),
+            ann_file=ann_file,
+            **kwargs,
+        )
+    def _join_prefix(self):
+        if not mmengine.is_abs(self.split_file) and self.split_file:
+            self.split_file = osp.join(self.data_root, self.split_file)
+        return super()._join_prefix()
+    def load_data_list(self) -> List[dict]:
+        """Load data list."""
+        with mmengine.get_local_path(self.ann_file) as ann_file:
+            coco = COCO(ann_file)
+        splits = mmengine.load(self.split_file, file_format='pkl')
+        img_prefix = self.data_prefix['img_path']
+        data_list = []
+        join_path = mmengine.fileio.get_file_backend(img_prefix).join_path
+        for refer in splits:
+            if refer['split'] != self.split:
+                continue
+            ann = coco.anns[refer['ann_id']]
+            img = coco.imgs[ann['image_id']]
+            sentences = refer['sentences']
+            bbox = np.array(ann['bbox'], dtype=np.float32)
+            bbox[2:4] = bbox[0:2] + bbox[2:4]  # XYWH -> XYXY
+            for sent in sentences:
+                data_info = {
+                    'img_path': join_path(img_prefix, img['file_name']),
+                    'image_id': ann['image_id'],
+                    'ann_id': ann['id'],
+                    'text': sent['sent'],
+                    'gt_bboxes': bbox[None, :],
+                }
+                data_list.append(data_info)
+        if len(data_list) == 0:
+            raise ValueError(f'No sample in split "{self.split}".')
+        return data_list

mmpretrain/datasets/samplers/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from .repeat_aug import RepeatAugSampler
+from .sequential import SequentialSampler
+__all__ = ['RepeatAugSampler', 'SequentialSampler']

mmpretrain/datasets/samplers/repeat_aug.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import math
+from typing import Iterator, Optional, Sized
+import torch
+from mmengine.dist import get_dist_info, is_main_process, sync_random_seed
+from torch.utils.data import Sampler
+from mmpretrain.registry import DATA_SAMPLERS
+@DATA_SAMPLERS.register_module()
+class RepeatAugSampler(Sampler):
+    """Sampler that restricts data loading to a subset of the dataset for
+    distributed, with repeated augmentation. It ensures that different each
+    augmented version of a sample will be visible to a different process (GPU).
+    Heavily based on torch.utils.data.DistributedSampler.
+    This sampler was taken from
+    https://github.com/facebookresearch/deit/blob/0c4b8f60/samplers.py
+    Used in
+    Copyright (c) 2015-present, Facebook, Inc.
+    Args:
+        dataset (Sized): The dataset.
+        shuffle (bool): Whether shuffle the dataset or not. Defaults to True.
+        num_repeats (int): The repeat times of every sample. Defaults to 3.
+        seed (int, optional): Random seed used to shuffle the sampler if
+            :attr:`shuffle=True`. This number should be identical across all
+            processes in the distributed group. Defaults to None.
+    """
+    def __init__(self,
+                 dataset: Sized,
+                 shuffle: bool = True,
+                 num_repeats: int = 3,
+                 seed: Optional[int] = None):
+        rank, world_size = get_dist_info()
+        self.rank = rank
+        self.world_size = world_size
+        self.dataset = dataset
+        self.shuffle = shuffle
+        if not self.shuffle and is_main_process():
+            from mmengine.logging import MMLogger
+            logger = MMLogger.get_current_instance()
+            logger.warning('The RepeatAugSampler always picks a '
+                           'fixed part of data if `shuffle=False`.')
+        if seed is None:
+            seed = sync_random_seed()
+        self.seed = seed
+        self.epoch = 0
+        self.num_repeats = num_repeats
+        # The number of repeated samples in the rank
+        self.num_samples = math.ceil(
+            len(self.dataset) * num_repeats / world_size)
+        # The total number of repeated samples in all ranks.
+        self.total_size = self.num_samples * world_size
+        # The number of selected samples in the rank
+        self.num_selected_samples = math.ceil(len(self.dataset) / world_size)
+    def __iter__(self) -> Iterator[int]:
+        """Iterate the indices."""
+        # deterministically shuffle based on epoch and seed
+        if self.shuffle:
+            g = torch.Generator()
+            g.manual_seed(self.seed + self.epoch)
+            indices = torch.randperm(len(self.dataset), generator=g).tolist()
+        else:
+            indices = list(range(len(self.dataset)))
+        # produce repeats e.g. [0, 0, 0, 1, 1, 1, 2, 2, 2....]
+        indices = [x for x in indices for _ in range(self.num_repeats)]
+        # add extra samples to make it evenly divisible
+        padding_size = self.total_size - len(indices)
+        indices += indices[:padding_size]
+        assert len(indices) == self.total_size
+        # subsample per rank
+        indices = indices[self.rank:self.total_size:self.world_size]
+        assert len(indices) == self.num_samples
+        # return up to num selected samples
+        return iter(indices[:self.num_selected_samples])
+    def __len__(self) -> int:
+        """The number of samples in this rank."""
+        return self.num_selected_samples
+    def set_epoch(self, epoch: int) -> None:
+        """Sets the epoch for this sampler.
+        When :attr:`shuffle=True`, this ensures all replicas use a different
+        random ordering for each epoch. Otherwise, the next iteration of this
+        sampler will yield the same ordering.
+        Args:
+            epoch (int): Epoch number.
+        """
+        self.epoch = epoch

mmpretrain/datasets/samplers/sequential.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Iterator
+import torch
+from mmengine.dataset import DefaultSampler
+from mmpretrain.registry import DATA_SAMPLERS
+@DATA_SAMPLERS.register_module()
+class SequentialSampler(DefaultSampler):
+    """Sequential sampler which supports different subsample policy.
+    Args:
+        dataset (Sized): The dataset.
+        round_up (bool): Whether to add extra samples to make the number of
+            samples evenly divisible by the world size. Defaults to True.
+        subsample_type (str): The method to subsample data on different rank.
+            Supported type:
+            - ``'default'``: Original torch behavior. Sample the examples one
+              by one for each GPU in terms. For instance, 8 examples on 2 GPUs,
+              GPU0: [0,2,4,8], GPU1: [1,3,5,7]
+            - ``'sequential'``: Subsample all examples to n chunk sequntially.
+              For instance, 8 examples on 2 GPUs,
+              GPU0: [0,1,2,3], GPU1: [4,5,6,7]
+    """
+    def __init__(self, subsample_type: str = 'default', **kwargs) -> None:
+        super().__init__(shuffle=False, **kwargs)
+        if subsample_type not in ['default', 'sequential']:
+            raise ValueError(f'Unsupported subsample typer "{subsample_type}",'
+                             ' please choose from ["default", "sequential"]')
+        self.subsample_type = subsample_type
+    def __iter__(self) -> Iterator[int]:
+        """Iterate the indices."""
+        indices = torch.arange(len(self.dataset)).tolist()
+        # add extra samples to make it evenly divisible
+        if self.round_up:
+            indices = (
+                indices *
+                int(self.total_size / len(indices) + 1))[:self.total_size]
+        # subsample
+        if self.subsample_type == 'default':
+            indices = indices[self.rank:self.total_size:self.world_size]
+        elif self.subsample_type == 'sequential':
+            num_samples_per_rank = self.total_size // self.world_size
+            indices = indices[self.rank *
+                              num_samples_per_rank:(self.rank + 1) *
+                              num_samples_per_rank]
+        return iter(indices)

mmpretrain/datasets/scienceqa.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+from typing import Callable, List, Sequence
+import mmengine
+from mmengine.dataset import BaseDataset
+from mmengine.fileio import get_file_backend
+from mmpretrain.registry import DATASETS
+@DATASETS.register_module()
+class ScienceQA(BaseDataset):
+    """ScienceQA dataset.
+    This dataset is used to load the multimodal data of ScienceQA dataset.
+    Args:
+        data_root (str): The root directory for ``data_prefix`` and
+            ``ann_file``.
+        split (str): The split of dataset. Options: ``train``, ``val``,
+            ``test``, ``trainval``, ``minival``, and ``minitest``.
+        split_file (str): The split file of dataset, which contains the
+            ids of data samples in the split.
+        ann_file (str): Annotation file path.
+        data_prefix (dict): Prefix for data field. Defaults to
+            ``dict(img_path='')``.
+        pipeline (Sequence): Processing pipeline. Defaults to an empty tuple.
+        **kwargs: Other keyword arguments in :class:`BaseDataset`.
+    """
+    def __init__(self,
+                 data_root: str,
+                 split: str,
+                 split_file: str,
+                 ann_file: str,
+                 data_prefix: dict = dict(img_path=''),
+                 pipeline: Sequence[Callable] = (),
+                 **kwargs):
+        assert split in [
+            'train', 'val', 'test', 'trainval', 'minival', 'minitest'
+        ], f'Invalid split {split}'
+        self.split = split
+        self.split_file = os.path.join(data_root, split_file)
+        super().__init__(
+            data_root=data_root,
+            ann_file=ann_file,
+            data_prefix=data_prefix,
+            pipeline=pipeline,
+            **kwargs)
+    def load_data_list(self) -> List[dict]:
+        """Load data list."""
+        img_prefix = self.data_prefix['img_path']
+        annotations = mmengine.load(self.ann_file)
+        current_data_split = mmengine.load(self.split_file)[self.split]  # noqa
+        file_backend = get_file_backend(img_prefix)
+        data_list = []
+        for data_id in current_data_split:
+            ann = annotations[data_id]
+            data_info = {
+                'image_id':
+                data_id,
+                'question':
+                ann['question'],
+                'choices':
+                ann['choices'],
+                'gt_answer':
+                ann['answer'],
+                'hint':
+                ann['hint'],
+                'image_name':
+                ann['image'],
+                'task':
+                ann['task'],
+                'grade':
+                ann['grade'],
+                'subject':
+                ann['subject'],
+                'topic':
+                ann['topic'],
+                'category':
+                ann['category'],
+                'skill':
+                ann['skill'],
+                'lecture':
+                ann['lecture'],
+                'solution':
+                ann['solution'],
+                'split':
+                ann['split'],
+                'img_path':
+                file_backend.join_path(img_prefix, data_id, ann['image'])
+                if ann['image'] is not None else None,
+                'has_image':
+                True if ann['image'] is not None else False,
+            }
+            data_list.append(data_info)
+        return data_list

mmpretrain/datasets/stanfordcars.py ADDED Viewed

	@@ -0,0 +1,148 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import List
+import mat4py
+from mmengine import get_file_backend
+from mmpretrain.registry import DATASETS
+from .base_dataset import BaseDataset
+from .categories import STANFORDCARS_CATEGORIES
+@DATASETS.register_module()
+class StanfordCars(BaseDataset):
+    """The Stanford Cars Dataset.
+    Support the `Stanford Cars Dataset <https://ai.stanford.edu/~jkrause/cars/car_dataset.html>`_ Dataset.
+    The official website provides two ways to organize the dataset.
+    Therefore, after downloading and decompression, the dataset directory structure is as follows.
+    Stanford Cars dataset directory: ::
+        Stanford_Cars
+        ├── car_ims
+        │   ├── 00001.jpg
+        │   ├── 00002.jpg
+        │   └── ...
+        └── cars_annos.mat
+    or ::
+        Stanford_Cars
+        ├── cars_train
+        │   ├── 00001.jpg
+        │   ├── 00002.jpg
+        │   └── ...
+        ├── cars_test
+        │   ├── 00001.jpg
+        │   ├── 00002.jpg
+        │   └── ...
+        └── devkit
+            ├── cars_meta.mat
+            ├── cars_train_annos.mat
+            ├── cars_test_annos.mat
+            ├── cars_test_annoswithlabels.mat
+            ├── eval_train.m
+            └── train_perfect_preds.txt
+    Args:
+        data_root (str): The root directory for Stanford Cars dataset.
+        split (str, optional): The dataset split, supports "train"
+            and "test". Default to "train".
+    Examples:
+        >>> from mmpretrain.datasets import StanfordCars
+        >>> train_dataset = StanfordCars(data_root='data/Stanford_Cars', split='train')
+        >>> train_dataset
+        Dataset StanfordCars
+            Number of samples:  8144
+            Number of categories:       196
+            Root of dataset:    data/Stanford_Cars
+        >>> test_dataset = StanfordCars(data_root='data/Stanford_Cars', split='test')
+        >>> test_dataset
+        Dataset StanfordCars
+            Number of samples:  8041
+            Number of categories:       196
+            Root of dataset:    data/Stanford_Cars
+    """  # noqa: E501
+    METAINFO = {'classes': STANFORDCARS_CATEGORIES}
+    def __init__(self, data_root: str, split: str = 'train', **kwargs):
+        splits = ['train', 'test']
+        assert split in splits, \
+            f"The split must be one of {splits}, but get '{split}'"
+        self.split = split
+        test_mode = split == 'test'
+        self.backend = get_file_backend(data_root, enable_singleton=True)
+        anno_file_path = self.backend.join_path(data_root, 'cars_annos.mat')
+        if self.backend.exists(anno_file_path):
+            ann_file = 'cars_annos.mat'
+            data_prefix = ''
+        else:
+            if test_mode:
+                ann_file = self.backend.join_path(
+                    'devkit', 'cars_test_annos_withlabels.mat')
+                data_prefix = 'cars_test'
+            else:
+                ann_file = self.backend.join_path('devkit',
+                                                  'cars_train_annos.mat')
+                data_prefix = 'cars_train'
+            if not self.backend.exists(
+                    self.backend.join_path(data_root, ann_file)):
+                doc_url = 'https://mmpretrain.readthedocs.io/en/latest/api/datasets.html#stanfordcars'  # noqa: E501
+                raise RuntimeError(
+                    f'The dataset is incorrectly organized, please \
+                    refer to {doc_url} and reorganize your folders.')
+        super(StanfordCars, self).__init__(
+            ann_file=ann_file,
+            data_root=data_root,
+            data_prefix=data_prefix,
+            test_mode=test_mode,
+            **kwargs)
+    def load_data_list(self):
+        data = mat4py.loadmat(self.ann_file)['annotations']
+        data_list = []
+        if 'test' in data.keys():
+            # first way
+            img_paths, labels, test = data['relative_im_path'], data[
+                'class'], data['test']
+            num = len(img_paths)
+            assert num == len(labels) == len(test), 'get error ann file'
+            for i in range(num):
+                if not self.test_mode and test[i] == 1:
+                    continue
+                if self.test_mode and test[i] == 0:
+                    continue
+                img_path = self.backend.join_path(self.img_prefix,
+                                                  img_paths[i])
+                gt_label = labels[i] - 1
+                info = dict(img_path=img_path, gt_label=gt_label)
+                data_list.append(info)
+        else:
+            # second way
+            img_names, labels = data['fname'], data['class']
+            num = len(img_names)
+            assert num == len(labels), 'get error ann file'
+            for i in range(num):
+                img_path = self.backend.join_path(self.img_prefix,
+                                                  img_names[i])
+                gt_label = labels[i] - 1
+                info = dict(img_path=img_path, gt_label=gt_label)
+                data_list.append(info)
+        return data_list
+    def extra_repr(self) -> List[str]:
+        """The extra repr information of the dataset."""
+        body = [
+            f'Root of dataset: \t{self.data_root}',
+        ]
+        return body

mmpretrain/datasets/sun397.py ADDED Viewed

	@@ -0,0 +1,225 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import List
+from mmengine import get_file_backend, list_from_file
+from mmpretrain.registry import DATASETS
+from .base_dataset import BaseDataset
+from .categories import SUN397_CATEGORIES
+# Note that some images are not a jpg file although the name ends
+# with jpg and therefore cannot be read properly. So we provide
+# a list to skip these files.
+INVALID = [
+    '/a/assembly_line/sun_ajckcfldgdrdjogj.jpg',
+    '/a/auto_factory/sun_apfsprenzdnzbhmt.jpg',
+    '/b/baggage_claim/sun_avittiqqaiibgcau.jpg',
+    '/b/batters_box/sun_alqlfpgtbgggezyr.jpg',
+    '/b/bow_window/indoor/sun_ahsholsagvlrsboa.jpg',
+    '/b/bow_window/indoor/sun_aioomcoujmmcxkkx.jpg',
+    '/b/bow_window/outdoor/sun_atgtjdpqikjmllth.jpg',
+    '/c/carrousel/sun_atsgphqympojgxnc.jpg',
+    '/c/carrousel/sun_auzitjuirwolazns.jpg',
+    '/c/church/outdoor/sun_boagasgfltequmal.jpg',
+    '/c/church/outdoor/sun_brhmnwzzbkphcvfo.jpg',
+    '/c/church/outdoor/sun_byjkqzybxpjnuofa.jpg',
+    '/c/corridor/sun_aznefxvocwpgimko.jpg',
+    '/d/dentists_office/sun_aaefsoauqlcsihou.jpg',
+    '/d/diner/indoor/sun_apswilaujhntrybg.jpg',
+    '/e/elevator/door/sun_aaudobqlphijkjdv.jpg',
+    '/f/fastfood_restaurant/sun_axeniwtesffxqedr.jpg',
+    '/f/fire_station/sun_bjyapttwilyyuxqm.jpg',
+    '/f/fountain/sun_axgmpbdyvqhtkhee.jpg',
+    '/h/hospital_room/sun_ahokhhxjiclpxqqa.jpg',
+    '/o/oast_house/sun_bqsrrygxyrutgjve.jpg',
+    '/r/restaurant_patio/sun_aurwypviprwycame.jpg',
+    '/s/ski_resort/sun_bplmntyzoiobcqhp.jpg',
+    '/w/wine_cellar/bottle_storage/sun_afmzwxkzmxkbamqi.jpg',
+    '/w/wine_cellar/bottle_storage/sun_ahyymswdjejrbhyb.jpg',
+    '/w/wine_cellar/bottle_storage/sun_avnttpxamufejbfe.jpg',
+    '/a/archive/sun_awgsrbljlsvhqjij.jpg',
+    '/a/art_school/sun_aabogqsjulyvmcse.jpg',
+    '/a/art_school/sun_apnzojafyvkariue.jpg',
+    '/b/ball_pit/sun_atjhwqngtoeuwhso.jpg',
+    '/b/bow_window/indoor/sun_asxvsqbexmmtqmht.jpg',
+    '/b/bow_window/indoor/sun_abeugxecxrwzmffp.jpg',
+    '/b/bow_window/outdoor/sun_auwcqhrtzkgihvlv.jpg',
+    '/b/bow_window/outdoor/sun_apnvdyecnjjmcuhi.jpg',
+    '/c/childs_room/sun_alggivksjwwiklmt.jpg',
+    '/c/control_tower/outdoor/sun_avbcxakrvpomqdgr.jpg',
+    '/d/diner/indoor/sun_ajmzozstvsxisvgx.jpg',
+    '/e/elevator/door/sun_aaqsyluqbluugqgy.jpg',
+    '/f/fastfood_restaurant/sun_aevchxlxoruhxgrb.jpg',
+    '/f/firing_range/indoor/sun_affrzvahwjorpalo.jpg',
+    '/f/formal_garden/sun_bjvrlaeatjufekft.jpg',
+    '/g/garage/indoor/sun_akbocuwclkxqlofx.jpg',
+    '/g/greenhouse/indoor/sun_addirvgtxfbndlwf.jpg',
+    '/k/kindergarden_classroom/sun_ajtpaahilrqzarri.jpg',
+    '/l/laundromat/sun_afrrjykuhhlwiwun.jpg',
+    '/m/music_studio/sun_bsntklkmwqgnjrjj.jpg',
+    '/t/track/outdoor/sun_aophkoiosslinihb.jpg',
+    '/a/archive/sun_aegmzltkiwyevpwa.jpg',
+    '/a/auto_factory/sun_aybymzvbxgvcrwgn.jpg',
+    '/b/baggage_claim/sun_atpmiqmnxjpgqsxi.jpg',
+    '/b/baggage_claim/sun_ajffcdpsvgqfzoxx.jpg',
+    '/b/bamboo_forest/sun_ausmxphosyahoyjo.jpg',
+    '/b/batters_box/sun_aaeheulsicxtxnbu.jpg',
+    '/c/carrousel/sun_arjrjcxemhttubqz.jpg',
+    '/c/chicken_coop/outdoor/sun_abcegmmdbizqkpgh.jpg',
+    '/c/control_tower/outdoor/sun_axhjfpkxdvqdfkyr.jpg',
+    '/d/diner/indoor/sun_apaotiublwqeowck.jpg',
+    '/f/fastfood_restaurant/sun_anexashcgmxdbmxq.jpg',
+    '/l/landing_deck/sun_aizahnjfkuurjibw.jpg',
+    '/n/nuclear_power_plant/outdoor/sun_aoblfvgyleweqanr.jpg',
+    '/w/waiting_room/sun_aicytusmthfvqcwc.jpg',
+    '/b/bow_window/indoor/sun_asmvdfnjlulewkpr.jpg',
+    '/b/bus_interior/sun_adhktvidwzmodeou.jpg',
+    '/c/catacomb/sun_algnawesgjzzmcqd.jpg',
+    '/c/church/outdoor/sun_baihxlseimcsdhdx.jpg',
+    '/d/diner/indoor/sun_agoyalzcawgxodbm.jpg',
+    '/e/elevator_shaft/sun_awaitimkinrjaybl.jpg',
+    '/f/fastfood_restaurant/sun_aplvzfbmtqtbsvbx.jpg',
+    '/g/greenhouse/indoor/sun_bkccvyfpwetwjuhk.jpg',
+    '/c/car_interior/backseat/sun_adexwfoqdyhowxpu.jpg',
+    '/c/church/outdoor/sun_blmmweiumednscuf.jpg',
+    '/f/fire_station/sun_bibntbsuunbsdrum.jpg',
+    '/g/game_room/sun_aopfaqlllpvzhrak.jpg',
+    '/u/underwater/coral_reef/sun_biiueajvszaxqopo.jpg',
+    '/a/airplane_cabin/sun_arqyikigkyfpegug.jpg',
+    '/b/badminton_court/indoor/sun_amppvxecgtjpfold.jpg',
+    '/c/carrousel/sun_anxtrtieimkpmhvk.jpg',
+    '/c/computer_room/sun_aebgvpgtwoqbfyvl.jpg',
+    '/f/fire_escape/sun_atbraxuwwlvdoolv.jpg',
+    '/k/kasbah/sun_abxkkoielpavsouu.jpg',
+    '/t/tower/sun_bccqnzcvqkiwicjt.jpg',
+    '/a/archive/sun_afngadshxudodkct.jpg',
+    '/b/bow_window/indoor/sun_awnrlipyxpgxxgxz.jpg',
+    '/c/control_tower/outdoor/sun_arohngcbtsvbthho.jpg',
+    '/f/fire_station/sun_brbskkfgghbfvgkk.jpg',
+    '/r/restaurant_patio/sun_amjfbqzfgxarrpec.jpg',
+    '/v/vineyard/sun_bdxhnbgbnolddswz.jpg',
+    '/b/baggage_claim/sun_axrtsmillrglugia.jpg',
+    '/d/diner/indoor/sun_alaqevbwpjaqqdqz.jpg',
+    '/l/landing_deck/sun_acodgoamhgnnbmvr.jpg',
+    '/c/carrousel/sun_adsafgyrinnekycc.jpg',
+    '/c/church/outdoor/sun_bzqhuwshtdgakkay.jpg',
+    '/c/closet/sun_absahzamlrylkxyn.jpg',
+    '/f/fire_escape/sun_acdthenaosuqcoqn.jpg',
+    '/b/butchers_shop/sun_asrdgbefoszenfex.jpg',
+    '/c/church/outdoor/sun_bzfyucfrdigaqneg.jpg',
+    '/c/church/outdoor/sun_byzxhknqrejdajxi.jpg',
+    '/c/cockpit/sun_ajkulpqauavrmxae.jpg',
+    '/l/living_room/sun_aefoqbeatyufobtx.jpg',
+    '/s/supermarket/sun_attvxbzocurnddbz.jpg',
+    '/c/closet/sun_aqnutmwfkypmrnfy.jpg',
+    '/f/fire_station/sun_bttrtzktpbymxkmf.jpg',
+    '/s/shopping_mall/indoor/sun_avwzjsijaxnwuzjx.jpg',
+    '/w/windmill/sun_blvczkyqbmabzeej.jpg',
+    '/c/chicken_coop/outdoor/sun_amaonsnnkskxwmrj.jpg',
+    '/s/swimming_pool/outdoor/sun_bslaihiqlhfewtzn.jpg',
+    '/u/underwater/coral_reef/sun_bhcrnmvbgnkvcvkr.jpg',
+    '/d/dining_room/sun_azlxdhiajwrhaivq.jpg',
+    '/c/church/outdoor/sun_bnunxbznqnvgeykx.jpg',
+    '/c/corridor/sun_aspwpqqlcwzfanvl.jpg',
+    '/r/restaurant_patio/sun_awcbpizjbudjvrhs.jpg',
+    '/b/ball_pit/sun_avdnmemjrgrbkwjm.jpg',
+]
+@DATASETS.register_module()
+class SUN397(BaseDataset):
+    """The SUN397 Dataset.
+    Support the `SUN397 Dataset <https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/>`_ Dataset.
+    After downloading and decompression, the dataset directory structure is as follows.
+    SUN397 dataset directory: ::
+        SUN397
+        ├── SUN397
+        │   ├── a
+        │   │   ├── abbey
+        │   |   |   ├── sun_aaalbzqrimafwbiv.jpg
+        │   |   |   └── ...
+        │   │   ├── airplane_cabin
+        │   |   |   ├── sun_aadqdkqaslqqoblu.jpg
+        │   |   |   └── ...
+        │   |   └── ...
+        │   ├── b
+        │   │   └── ...
+        │   ├── c
+        │   │   └── ...
+        │   └── ...
+        └── Partitions
+            ├── ClassName.txt
+            ├── Training_01.txt
+            ├── Testing_01.txt
+            └── ...
+    Args:
+        data_root (str): The root directory for Stanford Cars dataset.
+        split (str, optional): The dataset split, supports "train" and "test".
+            Default to "train".
+    Examples:
+        >>> from mmpretrain.datasets import SUN397
+        >>> train_dataset = SUN397(data_root='data/SUN397', split='train')
+        >>> train_dataset
+        Dataset SUN397
+            Number of samples:  19824
+            Number of categories:       397
+            Root of dataset:    data/SUN397
+        >>> test_dataset = SUN397(data_root='data/SUN397', split='test')
+        >>> test_dataset
+        Dataset SUN397
+            Number of samples:  19829
+            Number of categories:       397
+            Root of dataset:    data/SUN397
+    """  # noqa: E501
+    METAINFO = {'classes': SUN397_CATEGORIES}
+    def __init__(self, data_root: str, split: str = 'train', **kwargs):
+        splits = ['train', 'test']
+        assert split in splits, \
+            f"The split must be one of {splits}, but get '{split}'"
+        self.split = split
+        self.backend = get_file_backend(data_root, enable_singleton=True)
+        if split == 'train':
+            ann_file = self.backend.join_path('Partitions', 'Training_01.txt')
+        else:
+            ann_file = self.backend.join_path('Partitions', 'Testing_01.txt')
+        data_prefix = 'SUN397'
+        test_mode = split == 'test'
+        super(SUN397, self).__init__(
+            ann_file=ann_file,
+            data_root=data_root,
+            test_mode=test_mode,
+            data_prefix=data_prefix,
+            **kwargs)
+    def load_data_list(self):
+        pairs = list_from_file(self.ann_file)
+        data_list = []
+        for pair in pairs:
+            if pair in INVALID:
+                continue
+            img_path = self.backend.join_path(self.img_prefix, pair[1:])
+            items = pair.split('/')
+            class_name = '_'.join(items[2:-1])
+            gt_label = self.METAINFO['classes'].index(class_name)
+            info = dict(img_path=img_path, gt_label=gt_label)
+            data_list.append(info)
+        return data_list
+    def extra_repr(self) -> List[str]:
+        """The extra repr information of the dataset."""
+        body = [
+            f'Root of dataset: \t{self.data_root}',
+        ]
+        return body

mmpretrain/datasets/transforms/__init__.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.transforms import (CenterCrop, LoadImageFromFile, Normalize,
+                             RandomFlip, RandomGrayscale, RandomResize, Resize)
+from mmpretrain.registry import TRANSFORMS
+from .auto_augment import (AutoAugment, AutoContrast, BaseAugTransform,
+                           Brightness, ColorTransform, Contrast, Cutout,
+                           Equalize, GaussianBlur, Invert, Posterize,
+                           RandAugment, Rotate, Sharpness, Shear, Solarize,
+                           SolarizeAdd, Translate)
+from .formatting import (Collect, NumpyToPIL, PackInputs, PackMultiTaskInputs,
+                         PILToNumpy, Transpose)
+from .processing import (Albumentations, BEiTMaskGenerator, CleanCaption,
+                         ColorJitter, EfficientNetCenterCrop,
+                         EfficientNetRandomCrop, Lighting, RandomCrop,
+                         RandomErasing, RandomResizedCrop, RandomTranslatePad,
+                         ResizeEdge, SimMIMMaskGenerator)
+from .wrappers import ApplyToList, MultiView
+for t in (CenterCrop, LoadImageFromFile, Normalize, RandomFlip,
+          RandomGrayscale, RandomResize, Resize):
+    TRANSFORMS.register_module(module=t)
+__all__ = [
+    'NumpyToPIL', 'PILToNumpy', 'Transpose', 'Collect', 'RandomCrop',
+    'RandomResizedCrop', 'Shear', 'Translate', 'Rotate', 'Invert',
+    'ColorTransform', 'Solarize', 'Posterize', 'AutoContrast', 'Equalize',
+    'Contrast', 'Brightness', 'Sharpness', 'AutoAugment', 'SolarizeAdd',
+    'Cutout', 'RandAugment', 'Lighting', 'ColorJitter', 'RandomErasing',
+    'PackInputs', 'Albumentations', 'EfficientNetRandomCrop',
+    'EfficientNetCenterCrop', 'ResizeEdge', 'BaseAugTransform',
+    'PackMultiTaskInputs', 'GaussianBlur', 'BEiTMaskGenerator',
+    'SimMIMMaskGenerator', 'CenterCrop', 'LoadImageFromFile', 'Normalize',
+    'RandomFlip', 'RandomGrayscale', 'RandomResize', 'Resize', 'MultiView',
+    'ApplyToList', 'CleanCaption', 'RandomTranslatePad'
+]