tuandunghcmut commited on Sep 20, 2024

Commit

345ee20

verified ·

1 Parent(s): 1c3e162

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +8 -0
LICENSE +21 -0
assets/framework.png +0 -0
assets/teaser.png +0 -0
core/__init__.py +0 -0
core/__pycache__/__init__.cpython-312.pyc +0 -0
core/__pycache__/comm_.cpython-312.pyc +0 -0
core/__pycache__/config.cpython-312.pyc +0 -0
core/__pycache__/distributed_utils.cpython-312.pyc +0 -0
core/__pycache__/make_param_group.cpython-312.pyc +0 -0
core/__pycache__/memory.cpython-312.pyc +0 -0
core/__pycache__/utils.cpython-312.pyc +0 -0
core/clipping.py +92 -0
core/comm_.py +307 -0
core/config.py +600 -0
core/data/__init__.py +0 -0
core/data/__pycache__/__init__.cpython-312.pyc +0 -0
core/data/datasets/__init__.py +15 -0
core/data/datasets/__pycache__/__init__.cpython-312.pyc +0 -0
core/data/datasets/images/__pycache__/image_caption_dataset.cpython-312.pyc +0 -0
core/data/datasets/images/__pycache__/multi_posedataset.cpython-312.pyc +0 -0
core/data/datasets/images/__pycache__/parsing_dataset.cpython-312.pyc +0 -0
core/data/datasets/images/__pycache__/pedattr_dataset.cpython-312.pyc +0 -0
core/data/datasets/images/__pycache__/peddet_dataset_v2.cpython-312.pyc +0 -0
core/data/datasets/images/__pycache__/pos_dataset_dev.cpython-312.pyc +0 -0
core/data/datasets/images/__pycache__/seg_dataset_dev.cpython-312.pyc +0 -0
core/data/datasets/images/__pycache__/smpl_dataset_v2.cpython-312.pyc +0 -0
core/data/datasets/images/image_caption_dataset.py +261 -0
core/data/datasets/images/multi_posedataset.py +413 -0
core/data/datasets/images/parsing_dataset.py +1084 -0
core/data/datasets/images/pedattr_dataset.py +665 -0
core/data/datasets/images/peddet_dataset_v2.py +578 -0
core/data/datasets/images/pos_dataset_dev.py +713 -0
core/data/datasets/images/resources/CHval.odgt +3 -0
core/data/datasets/images/resources/COCO_val2017_detections_AP_H_56_person.json +3 -0
core/data/datasets/images/resources/mpii_gt_val.mat +3 -0
core/data/datasets/images/resources/test_caltech_heavy_1xnew.odgt +0 -0
core/data/datasets/images/seg_data_tools/__init__.py +0 -0
core/data/datasets/images/seg_data_tools/collate.py +143 -0
core/data/datasets/images/seg_data_tools/cv2_aug_transforms.py +889 -0
core/data/datasets/images/seg_data_tools/transforms.py +106 -0
core/data/datasets/images/seg_dataset_dev.py +293 -0
core/data/datasets/images/smpl_data_tools/__pycache__/_smpl.cpython-312.pyc +0 -0
core/data/datasets/images/smpl_data_tools/__pycache__/config_smpl.cpython-312.pyc +0 -0
core/data/datasets/images/smpl_data_tools/__pycache__/image_ops.cpython-312.pyc +0 -0
core/data/datasets/images/smpl_data_tools/__pycache__/tsv_file.cpython-312.pyc +0 -0
core/data/datasets/images/smpl_data_tools/_smpl.py +333 -0
core/data/datasets/images/smpl_data_tools/config_smpl.py +53 -0
core/data/datasets/images/smpl_data_tools/image_ops.py +230 -0
core/data/datasets/images/smpl_data_tools/smpl_modeling/data/J_regressor_extra.npy +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+core/data/datasets/images/resources/CHval.odgt filter=lfs diff=lfs merge=lfs -text
+core/data/datasets/images/resources/COCO_val2017_detections_AP_H_56_person.json filter=lfs diff=lfs merge=lfs -text
+core/data/datasets/images/resources/mpii_gt_val.mat filter=lfs diff=lfs merge=lfs -text
+core/solvers/utils/pycocoevalcap/meteor/meteor-1.5.jar filter=lfs diff=lfs merge=lfs -text
+core/solvers/utils/pycocoevalcap/spice/lib/Meteor-1.5.jar filter=lfs diff=lfs merge=lfs -text
+core/solvers/utils/pycocoevalcap/spice/lib/guava-19.0.jar filter=lfs diff=lfs merge=lfs -text
+core/solvers/utils/pycocoevalcap/spice/spice-1.0.jar filter=lfs diff=lfs merge=lfs -text
+core/solvers/utils/pycocoevalcap/tokenizer/stanford-corenlp-3.4.1.jar filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 Shanghai AI Laboratory
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

assets/framework.png ADDED Viewed

assets/teaser.png ADDED Viewed

core/__init__.py ADDED Viewed

File without changes

core/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (183 Bytes). View file

core/__pycache__/comm_.cpython-312.pyc ADDED Viewed

Binary file (11.9 kB). View file

core/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (26 kB). View file

core/__pycache__/distributed_utils.cpython-312.pyc ADDED Viewed

Binary file (91.8 kB). View file

core/__pycache__/make_param_group.cpython-312.pyc ADDED Viewed

Binary file (4.17 kB). View file

core/__pycache__/memory.cpython-312.pyc ADDED Viewed

Binary file (3.77 kB). View file

core/__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (57.9 kB). View file

core/clipping.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import time
+import numpy as np
+import torch
+from easydict import EasyDict as edict
+import warnings
+from torch._six import inf
+from core.utils import sync_print
+# return if any inf/nan
+# div norm by loss_scale, for 'real' norm
+# if auto_clipper provided, compute max_norm using auto_clipper
+# else, using give max_norm
+def clip_grad_norm_(parameters, max_norm=1000000, norm_type=2, auto_clipper=None, loss_scale=1.0):
+    if isinstance(parameters, torch.Tensor):
+        parameters = [parameters]
+    parameters = list(filter(lambda p: p[1].grad is not None, parameters))
+    if len(parameters) == 0: return None
+    max_norm = float(max_norm)
+    norm_type = float(norm_type)
+    if norm_type == inf:
+        total_norm = max(p.grad.data.abs().max() for p in parameters)
+    else:
+        total_norm = 0
+        for name,p in parameters:
+            param_norm = p.grad.data.norm(norm_type)
+            total_norm += param_norm.item() ** norm_type
+        total_norm = total_norm ** (1. / norm_type)
+    # check inf/nan
+    overflow_num = torch.zeros(1)
+    if np.isinf(total_norm) or np.isnan(total_norm):
+        overflow_num[0] = 1
+    torch.distributed.all_reduce.allreduce(overflow_num)
+    if overflow_num > 0:
+        for name,p in parameters:
+            p.grad.data.fill_(float('nan'))
+        sync_print('total_norm is inf({})/nan({}), skip clipping!!!'.format(np.isinf(total_norm), np.isnan(total_norm)))
+        return total_norm
+    # rescale the total_norm by loss_scale
+    total_norm /= loss_scale
+    # update auto_clipper, compute max_norm
+    if auto_clipper is not None:
+        max_norm = auto_clipper.update(total_norm)
+    # do clipping
+    clip_coef = max_norm / (total_norm + 1e-6)
+    if clip_coef < 1:
+        # sync_print('clip_coef: {}'.format(clip_coef))
+        for _, p in parameters:
+            p.grad.data.mul_(clip_coef)
+    return total_norm
+class ClipMeter(object):
+    def __init__(self, mom=None, thresh=None, min_max=False, mean=False, init=False):
+        self.thresh = thresh
+        self.mom = mom
+        self.min_max = min_max
+        self.mean = mean
+        self.val = 1.0
+        self.init = init
+    def get_mean(self):
+        return self.val
+    def get_clip_val(self):
+        if self.mean:
+            return self.get_mean()
+        else:
+            return self.get_mean() * (1+self.thresh)
+    def update(self, x):
+        if self.init:
+            self.val = x
+            self.init = False
+        mean = self.get_mean()
+        if self.min_max:
+            x = max(min(x, mean*(1+self.thresh)), mean*(1-self.thresh))
+        else:
+            x = min(x, mean*(1+self.thresh))
+        self.val = self.mom * self.val + (1-self.mom)*x
+        return self.get_clip_val()

core/comm_.py ADDED Viewed

	@@ -0,0 +1,307 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+This file contains primitives for multi-gpu communication.
+This is useful when doing distributed training.
+"""
+import functools
+import logging
+import numpy as np
+import pickle
+import torch
+import torch.distributed as dist
+_LOCAL_PROCESS_GROUP = None
+_CAPTION_GEN_MODE = False
+temp_dir = TEMP_DIR = './data/temp'
+IDS = 'IDS'
+image_features = 'image_features'
+text_features = 'text_features'
+old_checkpoint = True
+"""
+A torch process group which only includes processes that on the same machine as the current process.
+This variable is set when processes are spawned by `launch()` in "engine/launch.py".
+"""
+def is_dist_avail_and_initialized():
+    if not dist.is_available():
+        return False
+    if not dist.is_initialized():
+        return False
+    return True
+def get_world_size() -> int:
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size()
+def get_rank() -> int:
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    return dist.get_rank()
+def get_local_rank() -> int:
+    """
+    Returns:
+        The rank of the current process within the local (per-machine) process group.
+    """
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    # assert _LOCAL_PROCESS_GROUP is not None
+    return dist.get_rank(group=_LOCAL_PROCESS_GROUP)
+def get_local_size() -> int:
+    """
+    Returns:
+        The size of the per-machine process group,
+        i.e. the number of processes per machine.
+    """
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size(group=_LOCAL_PROCESS_GROUP)
+def is_main_process() -> bool:
+    return get_rank() == 0
+def synchronize():
+    """
+    Helper function to synchronize (barrier) among all processes when
+    using distributed training
+    """
+    if not dist.is_available():
+        return
+    if not dist.is_initialized():
+        return
+    world_size = dist.get_world_size()
+    if world_size == 1:
+        return
+    dist.barrier()
+@functools.lru_cache()
+def _get_global_gloo_group():
+    """
+    Return a process group based on gloo backend, containing all the ranks
+    The result is cached.
+    """
+    if dist.get_backend() == "nccl":
+        return dist.new_group(backend="gloo")
+    else:
+        return dist.group.WORLD
+def _serialize_to_tensor(data, group):
+    backend = dist.get_backend(group)
+    assert backend in ["gloo", "nccl"]
+    device = torch.device("cpu" if backend == "gloo" else "cuda")
+    buffer = pickle.dumps(data)
+    if len(buffer) > 1024 ** 3:
+        logger = logging.getLogger(__name__)
+        logger.warning(
+            "Rank {} trying to all-gather {:.2f} GB of data on device {}".format(
+                get_rank(), len(buffer) / (1024 ** 3), device
+            )
+        )
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to(device=device)
+    return tensor
+def _pad_to_largest_tensor(tensor, group):
+    """
+    Returns:
+        list[int]: size of the tensor, on each rank
+        Tensor: padded tensor that has the max size
+    """
+    world_size = dist.get_world_size(group=group)
+    assert (
+            world_size >= 1
+    ), "comm.gather/all_gather must be called from ranks within the given group!"
+    local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device)
+    size_list = [
+        torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size)
+    ]
+    dist.all_gather(size_list, local_size, group=group)
+    size_list = [int(size.item()) for size in size_list]
+    max_size = max(size_list)
+    # we pad the tensor because torch all_gather does not support
+    # gathering tensors of different shapes
+    if local_size != max_size:
+        padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device)
+        tensor = torch.cat((tensor, padding), dim=0)
+    return size_list, tensor
+def all_gather(data, group=None):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors).
+    Args:
+        data: any picklable object
+        group: a torch process group. By default, will use a group which
+            contains all ranks on gloo backend.
+    Returns:
+        list[data]: list of data gathered from each rank
+    """
+    if get_world_size() == 1:
+        return [data]
+    if group is None:
+        group = _get_global_gloo_group()
+    if dist.get_world_size(group) == 1:
+        return [data]
+    tensor = _serialize_to_tensor(data, group)
+    size_list, tensor = _pad_to_largest_tensor(tensor, group)
+    max_size = max(size_list)
+    # receiving Tensor from all ranks
+    tensor_list = [
+        torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list
+    ]
+    dist.all_gather(tensor_list, tensor, group=group)
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+    return data_list
+def gather(data, dst=0, group=None):
+    """
+    Run gather on arbitrary picklable data (not necessarily tensors).
+    Args:
+        data: any picklable object
+        dst (int): destination rank
+        group: a torch process group. By default, will use a group which
+            contains all ranks on gloo backend.
+    Returns:
+        list[data]: on dst, a list of data gathered from each rank. Otherwise,
+            an empty list.
+    """
+    if get_world_size() == 1:
+        return [data]
+    if group is None:
+        group = _get_global_gloo_group()
+    if dist.get_world_size(group=group) == 1:
+        return [data]
+    rank = dist.get_rank(group=group)
+    tensor = _serialize_to_tensor(data, group)
+    size_list, tensor = _pad_to_largest_tensor(tensor, group)
+    # receiving Tensor from all ranks
+    if rank == dst:
+        max_size = max(size_list)
+        tensor_list = [
+            torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list
+        ]
+        dist.gather(tensor, tensor_list, dst=dst, group=group)
+        data_list = []
+        for size, tensor in zip(size_list, tensor_list):
+            buffer = tensor.cpu().numpy().tobytes()[:size]
+            data_list.append(pickle.loads(buffer))
+        return data_list
+    else:
+        dist.gather(tensor, [], dst=dst, group=group)
+        return []
+def broadcast_object(data, src=0, group=None):
+    """
+    Run gather on arbitrary picklable data (not necessarily tensors).
+    Args:
+        data: any picklable object
+        dst (int): destination rank
+        group: a torch process group. By default, will use a group which
+            contains all ranks on gloo backend.
+    Returns:
+        list[data]: on dst, a list of data gathered from each rank. Otherwise,
+            an empty list.
+    """
+    # if get_world_size() == 1:
+    #     return data
+    # if group is None:
+    #     group = _get_global_gloo_group()
+    # if dist.get_world_size(group=group) == 1:
+    #     return data
+    if not isinstance(data, list):
+        data_list = [data]
+        dist.broadcast_object_list(data_list, src=src, group=group)
+        return data_list[0]
+    else:
+        dist.broadcast_object_list(data, src=src, group=group)
+        return data
+    return data
+def shared_random_seed():
+    """
+    Returns:
+        int: a random number that is the same across all workers.
+            If workers need a shared RNG, they can use this shared seed to
+            create one.
+    All workers must call this function, otherwise it will deadlock.
+    """
+    ints = np.random.randint(2 ** 31)
+    all_ints = all_gather(ints)
+    return all_ints[0]
+def reduce_dict(input_dict, average=True):
+    """
+    Reduce the values in the dictionary from all processes so that process with rank
+    0 has the reduced results.
+    Args:
+        input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor.
+        average (bool): whether to do average or sum
+    Returns:
+        a dict with the same keys as input_dict, after reduction.
+    """
+    world_size = get_world_size()
+    if world_size < 2:
+        return input_dict
+    with torch.no_grad():
+        names = []
+        values = []
+        # sort the keys so that they are consistent across processes
+        for k in sorted(input_dict.keys()):
+            names.append(k)
+            values.append(input_dict[k])
+        values = torch.stack(values, dim=0)
+        dist.reduce(values, dst=0)
+        if dist.get_rank() == 0 and average:
+            # only main process gets accumulated, so only divide by
+            # world_size in this case
+            values /= world_size
+        reduced_dict = {k: v for k, v in zip(names, values)}
+    return reduced_dict
+def unwrap_model(model):
+    return model.module if hasattr(model, 'module') else model

core/config.py ADDED Viewed

	@@ -0,0 +1,600 @@

+import yaml
+import logging
+import numpy as np
+from easydict import EasyDict as edict
+import copy
+import re
+import torch.distributed as dist
+from .utils import printlog
+from torch.distributed.distributed_c10d import _get_global_rank
+task_specific_param = ['backbone', 'neck', 'decoder', 'dataset', 'sampler', 'lr_scheduler', 'optimizer',
+                       'extra', 'evaluation', 'model_entry_type', 'load_ignore', 'ckpt_task_id',
+                       'patch_neck','patch_adapter', 'patch_proj', 'label_neck', 'label_adapter', 'label_proj',]
+loader = yaml.SafeLoader
+loader.add_implicit_resolver(
+    u'tag:yaml.org,2002:float',
+    re.compile(u'''^(?:
+     [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
+    |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
+    |\\.[0-9_]+(?:[eE][-+][0-9]+)?
+    |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*
+    |[-+]?\\.(?:inf|Inf|INF)
+    |\\.(?:nan|NaN|NAN))$''', re.X),
+    list(u'-+0123456789.'))
+def flat(nums):
+    res = []
+    for i in nums:
+        if isinstance(i, list):
+            res.extend(flat(i))
+        else:
+            res.append(i)
+    return res
+def specific_group_split_modality_groups(group_spec, share_backbone_group_ids,
+                                   share_decoder_group_ids, share_rgb_group_ids,
+                                   share_video_group_ids, share_dense_labeling_group_ids,
+                                   share_sparse_labeling_group_ids, share_text_group_ids, share_modality_group_ids=None):
+    ## sanity check
+    assert type(group_spec) is list
+    assert all(map(lambda x: type(x) is int, group_spec))
+    num_groups = len(group_spec)
+    splits = np.sum(group_spec)
+    if dist.is_initialized():
+        world_size = dist.get_world_size()
+        rank = dist.get_rank()
+    else:
+        world_size = 1
+        rank = 0
+    assert world_size % splits == 0, f"{world_size} % {splits}"
+    unit = int(world_size / splits)
+    ## split
+    group_sizes = [x*unit for x in group_spec]  # [8,8,8] / [32, 16]
+    groups = []
+    roots = []
+    last = 0
+    task_info = edict()
+    all_ranks = []
+    for i,gs in enumerate(group_sizes):
+        ranks = list(map(int, np.arange(last, last+gs)))  #[0...8], [9...15], ...
+        groups.append(dist.new_group(ranks=ranks))
+        roots.append(last) # 0, 8, 16
+        all_ranks.append(ranks)
+        if rank in ranks:  # if current gpu rank in traversed rank task group
+            printlog(f">> task_info.group[{i}] ranks {ranks}")
+            task_info.group = groups[-1]  # subordinate to what group
+            task_info.task_size = gs  # 8
+            task_info.task_id = i
+            task_info.task_rank = rank - last
+            task_info.task_root_rank = last
+        last += gs
+    task_info.root_group = dist.new_group(ranks=roots)
+    printlog(f">> task_info.root_group ranks {roots}")
+    task_info.task_sizes = group_sizes
+    task_info.task_root_ranks = roots
+    task_info.task_num = num_groups
+    ## share_backbone_group spec
+    if share_backbone_group_ids is not None:  # *[0,0,0]*(default) | [0,1,0]task ids
+        # group size must equal within a share_group
+        backboneshareid2idx = {}
+        for idx, this_id in enumerate(share_backbone_group_ids):
+            if this_id not in backboneshareid2idx:
+                backboneshareid2idx[this_id] = list()
+            backboneshareid2idx[this_id].append(idx)  # {0: [0,1,2]}| {0: [0,2], 1: [1]}
+        ## create backbone share group
+        for idxs in backboneshareid2idx.values():  # idxs = [0, 1, 2]
+            this_group_ranks = flat([all_ranks[i] for i in idxs])
+            this_share_group = dist.new_group(ranks=this_group_ranks)
+            this_group_size = len(this_group_ranks)
+            if rank in this_group_ranks:
+                task_info.backbone_share_group = this_share_group
+                printlog(f">> task_info.backbone_share_group[{idxs}] ranks {this_group_ranks}")
+                task_info.backbone_group_size = len(backboneshareid2idx)
+                task_info.backbone_task_size = len(backboneshareid2idx) * this_group_size
+                task_info.backbone_task_rank = np.sum(rank < np.array(this_group_ranks))
+    ## share_decoder_group spec
+    if share_decoder_group_ids is not None:
+        # group size must equal within a share_group
+        decodershareid2idx = {}
+        for idx, this_id in enumerate(share_decoder_group_ids):
+            if this_id not in decodershareid2idx:
+                decodershareid2idx[this_id] = list()
+            decodershareid2idx[this_id].append(idx)
+        ## create decoder share group
+        for idxs in decodershareid2idx.values():
+            this_group_ranks = flat([all_ranks[i] for i in idxs])
+            this_share_group = dist.new_group(ranks=this_group_ranks)
+            this_group_size = len(this_group_ranks)
+            if rank in this_group_ranks:
+                task_info.decoder_share_group = this_share_group
+                printlog(f">> task_info.decoder_share_group[{idxs}] ranks {this_group_ranks}")
+                task_info.decoder_group_size = len(decodershareid2idx)
+                task_info.decoder_task_size = len(decodershareid2idx) * this_group_size
+                task_info.decoder_task_rank = np.sum(rank < np.array(this_group_ranks))
+    #  Now, only for sparse labeling to deal with the modality sharing problem,
+    #  which is not a good solution, but it works.
+    #  parameters that have grads in [0,1,2] are in modality share group,
+    #  parameters that do not have grads in [3,4] should be set in the task-specific group.
+    if share_modality_group_ids is not None:
+        # group size must equal within a share_group
+        modalityshareid2idx = {}
+        for idx, this_id in enumerate(share_modality_group_ids):
+            # -1 denotes that this modality does not appear in the current task
+            # if this_id == -1:
+            #     continue
+            if this_id not in modalityshareid2idx:
+                modalityshareid2idx[this_id] = list()
+            modalityshareid2idx[this_id].append(idx)
+        ## create modality share group
+        for idxs in modalityshareid2idx.values():  # 0: [1,2] 1: [3]
+            this_group_ranks = flat([all_ranks[i] for i in idxs]) # 1 2
+            this_share_group = dist.new_group(ranks=this_group_ranks)
+            this_group_size = len(this_group_ranks) # 2
+            if rank in this_group_ranks:
+                task_info.modality_share_group = this_share_group
+                printlog(f">> task_info.modality_share_group[{idxs}] ranks {this_group_ranks}")
+                task_info.modality_group_size = len(modalityshareid2idx)
+    if share_rgb_group_ids is not None:
+        # group size must equal within a share_group
+        rgbshareid2idx = {}
+        for idx, this_id in enumerate(share_rgb_group_ids):
+            # -1 denotes that this modality does not appear in the current task
+            # if this_id == -1:
+            #     continue
+            if this_id not in rgbshareid2idx:
+                rgbshareid2idx[this_id] = list()
+            rgbshareid2idx[this_id].append(idx)
+        ## create rgb share group
+        for idxs in rgbshareid2idx.values():  # 0: [1,2] 1: [3]
+            this_group_ranks = flat([all_ranks[i] for i in idxs]) # 1 2
+            this_share_group = dist.new_group(ranks=this_group_ranks)
+            this_group_size = len(this_group_ranks) # 2
+            if rank in this_group_ranks:
+                task_info.rgb_share_group = this_share_group
+                printlog(f">> task_info.rgb_share_group[{idxs}] ranks {this_group_ranks}")
+                task_info.rgb_group_size = len(rgbshareid2idx)
+                # task_info.rgb_task_size = len(rgbshareid2idx) * this_group_size
+                # task_info.rgb_task_rank = np.sum(rank < np.array(this_group_ranks))
+        # all_group_ranks = flat(rgbshareid2idx.values())
+        # if not len(rgbshareid2idx.values()) or dist.get_rank() not in all_group_ranks:
+        #     task_info.rgb_share_group = None
+    if share_dense_labeling_group_ids is not None:
+        # group size must equal within a share_group
+        dense_labelingshareid2idx = {}
+        for idx, this_id in enumerate(share_dense_labeling_group_ids):
+            # -1 denotes that this modality does not appear in the current task
+            # if this_id == -1:
+            #     continue
+            if this_id not in dense_labelingshareid2idx:
+                dense_labelingshareid2idx[this_id] = list()
+            dense_labelingshareid2idx[this_id].append(idx)
+        ## create dense share group
+        for idxs in dense_labelingshareid2idx.values():  # 0: [1,2] 1: [3]
+            this_group_ranks = flat([all_ranks[i] for i in idxs]) # 1 2
+            this_share_group = dist.new_group(ranks=this_group_ranks)
+            this_group_size = len(this_group_ranks) # 2
+            if rank in this_group_ranks:
+                task_info.dense_labeling_share_group = this_share_group
+                printlog(f">> task_info.dense_labeling_share_group[{idxs}] ranks {this_group_ranks}")
+                task_info.dense_labeling_group_size = len(dense_labelingshareid2idx)
+    if share_sparse_labeling_group_ids is not None:
+        # group size must equal within a share_group
+        sparse_labelingshareid2idx = {}
+        for idx, this_id in enumerate(share_sparse_labeling_group_ids):
+            # -1 denotes that this modality does not appear in the current task
+            # if this_id == -1:
+            #     continue
+            if this_id not in sparse_labelingshareid2idx:
+                sparse_labelingshareid2idx[this_id] = list()
+            sparse_labelingshareid2idx[this_id].append(idx)
+        ## create sparse share group
+        for idxs in sparse_labelingshareid2idx.values():  # 0: [1,2] 1: [3]
+            this_group_ranks = flat([all_ranks[i] for i in idxs]) # 1 2
+            this_share_group = dist.new_group(ranks=this_group_ranks)
+            this_group_size = len(this_group_ranks) # 2
+            if rank in this_group_ranks:
+                task_info.sparse_labeling_share_group = this_share_group
+                printlog(f">> task_info.sparse_labeling_share_group[{idxs}] ranks {this_group_ranks}")
+                task_info.sparse_labeling_group_size = len(sparse_labelingshareid2idx)
+    if share_text_group_ids is not None:
+        # group size must equal within a share_group
+        textshareid2idx = {}
+        for idx, this_id in enumerate(share_text_group_ids):
+            # -1 denotes that this modality does not appear in the current task
+            if this_id not in textshareid2idx:
+                textshareid2idx[this_id] = list()
+            textshareid2idx[this_id].append(idx)
+        ## create text share group
+        for idxs in textshareid2idx.values():  # 0: [1,2] 1: [3]
+            this_group_ranks = flat([all_ranks[i] for i in idxs]) # 1 2
+            this_share_group = dist.new_group(ranks=this_group_ranks)
+            this_group_size = len(this_group_ranks) # 2
+            if rank in this_group_ranks:
+                task_info.text_share_group = this_share_group
+                printlog(f">> task_info.text_share_group[{idxs}] ranks {this_group_ranks}")
+                task_info.text_group_size = len(textshareid2idx)
+    if share_video_group_ids is not None:
+        # group size must equal within a share_group
+        videoshareid2idx = {}
+        for idx, this_id in enumerate(share_video_group_ids):
+            # -1 denotes that this modality does not appear in the current task
+            # if this_id == -1:
+            #     continue
+            if this_id not in videoshareid2idx:
+                videoshareid2idx[this_id] = list()
+            videoshareid2idx[this_id].append(idx)
+        ## create video share group
+        for idxs in videoshareid2idx.values():  # 0: [1,2] 1: [3]
+            this_group_ranks = flat([all_ranks[i] for i in idxs]) # 1 2
+            this_share_group = dist.new_group(ranks=this_group_ranks)
+            this_group_size = len(this_group_ranks) # 2
+            if rank in this_group_ranks:
+                task_info.video_share_group = this_share_group
+                printlog(f">> task_info.video_share_group[{idxs}] ranks {this_group_ranks}")
+                task_info.video_group_size = len(videoshareid2idx)
+    return task_info
+def specific_group_split(group_spec, share_backbone_group_ids, \
+                        share_neck_group_ids, share_decoder_group_ids, share_adapter_group_ids):
+    ## sanity check
+    assert type(group_spec) is list
+    assert all(map(lambda x: type(x) is int, group_spec))
+    num_groups = len(group_spec)
+    splits = np.sum(group_spec)
+    world_size = dist.get_world_size()
+    rank = dist.get_rank()
+    assert world_size % splits == 0, f"{world_size} % {splits}"
+    unit = int(world_size / splits)
+    ## split
+    group_sizes = [x*unit for x in group_spec]  # [8,8,8] / [32, 16]
+    groups = []
+    roots = []
+    last = 0
+    task_info = edict()
+    all_ranks = []
+    # import pdb;
+    # pdb.set_trace()
+    for i,gs in enumerate(group_sizes):
+        ranks = list(map(int, np.arange(last, last+gs)))  #[0...8], [9...15], ...
+        groups.append(dist.new_group(ranks=ranks))
+        roots.append(last) # 0, 8, 16
+        all_ranks.append(ranks)
+        if rank in ranks:  # if current gpu rank in traversed rank task group
+            printlog(f">> task_info.group[{i}] ranks {ranks}")
+            task_info.group = groups[-1]  # subordinate to what group
+            task_info.task_size = gs  # 8
+            task_info.task_id = i
+            task_info.task_rank = rank - last
+            task_info.task_root_rank = last
+        last += gs
+    task_info.root_group = dist.new_group(ranks=roots)
+    printlog(f">> task_info.root_group ranks {roots}")
+    task_info.task_sizes = group_sizes
+    task_info.task_root_ranks = roots
+    task_info.task_num = num_groups
+    # pdb.set_trace()
+    ## share_backbone_group spec
+    if share_backbone_group_ids is not None:  # *[0,0,0]*(default) | [0,1,0]task ids
+        # group size must equal within a share_group
+        backboneshareid2idx = {}
+        for idx, this_id in enumerate(share_backbone_group_ids):
+            if this_id not in backboneshareid2idx:
+                backboneshareid2idx[this_id] = list()
+            backboneshareid2idx[this_id].append(idx)  # {0: [0,1,2]}| {0: [0,2], 1: [1]}
+        ## create backbone share group
+        for idxs in backboneshareid2idx.values():  # idxs = [0, 1, 2]
+            this_group_ranks = flat([all_ranks[i] for i in idxs])
+            this_share_group = dist.new_group(ranks=this_group_ranks)
+            this_group_size = len(this_group_ranks)
+            if rank in this_group_ranks:
+                task_info.backbone_share_group = this_share_group
+                printlog(f">> task_info.backbone_share_group[{idxs}] ranks {this_group_ranks}")
+                task_info.backbone_group_size = len(backboneshareid2idx)
+                task_info.backbone_task_size = len(backboneshareid2idx) * this_group_size
+                task_info.backbone_task_rank = np.sum(rank < np.array(this_group_ranks))
+    ## share_adapter_group spec
+    if share_adapter_group_ids is not None:  # *[0,0,0]*(default) | [0,1,0]task ids
+        # group size must equal within a share_group
+        adaptershareid2idx = {}
+        for idx, this_id in enumerate(share_adapter_group_ids):
+            if this_id not in adaptershareid2idx:
+                adaptershareid2idx[this_id] = list()
+            adaptershareid2idx[this_id].append(idx)  # {0: [0,1,2]}| {0: [0,2], 1: [1]}
+        ## create adapter share group
+        for idxs in adaptershareid2idx.values():  # idxs = [0, 1, 2]
+            this_group_ranks = flat([all_ranks[i] for i in idxs])
+            this_share_group = dist.new_group(ranks=this_group_ranks)
+            this_group_size = len(this_group_ranks)
+            if rank in this_group_ranks:
+                task_info.adapter_share_group = this_share_group
+                printlog(f">> task_info.adapter_share_group[{idxs}] ranks {this_group_ranks}")
+                task_info.adapter_group_size = len(adaptershareid2idx)
+                task_info.adapter_task_size = len(adaptershareid2idx) * this_group_size
+                task_info.adapter_task_rank = np.sum(rank < np.array(this_group_ranks))
+    # pdb.set_trace()
+    ## share_neck_group spec
+    if share_neck_group_ids is not None:
+        # group size must equal within a share_group
+        neckshareid2idx = {}
+        for idx, this_id in enumerate(share_neck_group_ids):
+            if this_id not in neckshareid2idx:
+                neckshareid2idx[this_id] = list()
+            neckshareid2idx[this_id].append(idx)
+        ## create neck share group
+        for idxs in neckshareid2idx.values():
+            this_group_ranks = flat([all_ranks[i] for i in idxs])
+            this_share_group = dist.new_group(ranks=this_group_ranks)
+            this_group_size = len(this_group_ranks)
+            if rank in this_group_ranks:
+                task_info.neck_share_group = this_share_group
+                printlog(f">> task_info.neck_share_group[{idxs}] ranks {this_group_ranks}")
+                task_info.neck_group_size = len(neckshareid2idx)
+                task_info.neck_task_size = len(neckshareid2idx) * this_group_size
+                task_info.neck_task_rank = np.sum(rank < np.array(this_group_ranks))
+    ## share_decoder_group spec
+    if share_decoder_group_ids is not None:
+        # group size must equal within a share_group
+        decodershareid2idx = {}
+        for idx, this_id in enumerate(share_decoder_group_ids):
+            if this_id not in decodershareid2idx:
+                decodershareid2idx[this_id] = list()
+            decodershareid2idx[this_id].append(idx)
+        ## create decoder share group
+        for idxs in decodershareid2idx.values():
+            this_group_ranks = flat([all_ranks[i] for i in idxs])
+            this_share_group = dist.new_group(ranks=this_group_ranks)
+            this_group_size = len(this_group_ranks)
+            if rank in this_group_ranks:
+                task_info.decoder_share_group = this_share_group
+                printlog(f">> task_info.decoder_share_group[{idxs}] ranks {this_group_ranks}")
+                task_info.decoder_group_size = len(decodershareid2idx)
+                task_info.decoder_task_size = len(decodershareid2idx) * this_group_size
+                task_info.decoder_task_rank = np.sum(rank < np.array(this_group_ranks))
+    return task_info
+class Config(object):
+    def __init__(self, config_file, noginfo=False, spec_ginfo_index=None):
+        with open(config_file) as f:
+            config = yaml.load(f, Loader=loader)
+        # print('config',config)
+        self.config_path = config_file
+        world_size = dist.get_world_size()
+        rank = dist.get_rank()
+        if noginfo:
+            ginfo = None
+        else:  # cherrypick from tasks
+            tasks = config['tasks']
+            num_tasks = len(tasks)
+            if spec_ginfo_index is not None:
+                assert spec_ginfo_index < len(tasks), \
+                'spec_ginfo_index={} is larger than num_tasks={}'.format(spec_ginfo_index, len(tasks))
+                tmp_config = copy.deepcopy(config)
+                config['tasks'] = dict()
+                config['tasks'][0] = tmp_config['tasks'][spec_ginfo_index]
+                config['tasks'][0]['gres_ratio'] = 1
+                tasks = config['tasks']
+                num_tasks = len(tasks)
+            # parse task_common and assign to each task
+            task_common = config.get('task_common', None)
+            if task_common is not None:
+                for i in range(num_tasks):
+                    for k,v in task_common.items():
+                        if not k in tasks[i]:
+                            printlog('setting {} to {} for task {}'.format(k, v, i))
+                            tasks[i][k] = v
+            group_spec = [tasks[i].get('gres_ratio',1) for i in range(num_tasks)]
+            ## share group spec
+            if config['common'].get('share_backbone_group', False):
+                share_backbone_group_ids = config['common']['share_backbone_group'][:num_tasks]
+            else:
+                share_backbone_group_ids = [0 for i in range(num_tasks)]  # hardcoded prior
+            if config['common'].get('share_adapter_group', False):
+                if len(config['common']['share_adapter_group']) == 1:
+                    adapter_list = []
+                    share_adapter_group_ids = config['common']['share_adapter_group'][:num_tasks]
+            else:
+                share_adapter_group_ids = [0 for i in range(num_tasks)]  # hardcoded prior
+            if config['common'].get('share_neck_group', False):
+                share_neck_group_ids = config['common']['share_neck_group'][:num_tasks]
+            else:
+                share_neck_group_ids = [0 for i in range(num_tasks)]  # hardcoded prior
+            if config['common'].get('share_decoder_group', False):
+                share_decoder_group_ids = config['common']['share_decoder_group'][:num_tasks]
+            else:
+                share_decoder_group_ids = [i for i in range(num_tasks)]  # hardcoded prior
+            ginfo = specific_group_split(group_spec, share_backbone_group_ids, share_neck_group_ids,
+                                     share_decoder_group_ids, share_adapter_group_ids)
+            loss_weight_sum = float(np.sum(np.array([task['loss_weight'] for task in tasks.values()])))
+            ginfo.task_name = tasks[ginfo.task_id]['name']
+            ginfo.task_names = [tasks[i]['name'] for i in range(ginfo.task_num)]
+            ginfo.task_weight = float(tasks[ginfo.task_id]['loss_weight']) / loss_weight_sum
+            ginfo.task_type = tasks[ginfo.task_id].get('type', 'normal')
+            ginfo.task_types = [tasks[i].get('type', 'normal') for i in range(ginfo.task_num)]
+            ginfo.task_random_seed = tasks[ginfo.task_id].get('random_seed', 0)
+            for p in task_specific_param:
+                if p in config['tasks'][ginfo.task_id]:
+                    config['common'][p] = config['tasks'][ginfo.task_id][p]
+                    printlog('{} of task{} has been overided to {}'.format(p, ginfo.task_id, config['common'][p]))
+        logger = logging.getLogger('global_logger')
+        self.world_size = world_size
+        self.rank = rank
+        self.ginfo = ginfo
+        self.config = config
+        self.config_file = config_file
+class Config_Hulk(object):
+    def __init__(self, config_file, noginfo=False, spec_ginfo_index=None):
+        with open(config_file) as f:
+            config = yaml.load(f, Loader=loader)
+        # print('config',config)
+        self.config_path = config_file
+        if dist.is_initialized():
+            world_size = dist.get_world_size()
+            rank = dist.get_rank()
+        else:
+            world_size = 1
+            rank = 0
+        if noginfo:
+            ginfo = None
+        else:  # cherrypick from tasks
+            tasks = config['tasks']
+            num_tasks = len(tasks)
+            if spec_ginfo_index is not None:
+                assert spec_ginfo_index < len(tasks), \
+                'spec_ginfo_index={} is larger than num_tasks={}'.format(spec_ginfo_index, len(tasks))
+                tmp_config = copy.deepcopy(config)
+                config['tasks'] = dict()
+                config['tasks'][0] = tmp_config['tasks'][spec_ginfo_index]
+                config['tasks'][0]['gres_ratio'] = 1
+                tasks = config['tasks']
+                num_tasks = len(tasks)
+            # parse task_common and assign to each task
+            task_common = config.get('task_common', None)
+            if task_common is not None:
+                for i in range(num_tasks):
+                    for k,v in task_common.items():
+                        if not k in tasks[i]:
+                            printlog('setting {} to {} for task {}'.format(k, v, i))
+                            tasks[i][k] = v
+            group_spec = [tasks[i].get('gres_ratio',1) for i in range(num_tasks)]
+            ## share group spec
+            if config['common'].get('share_backbone_group', False):
+                share_backbone_group_ids = config['common']['share_backbone_group'][:num_tasks]
+            else:
+                share_backbone_group_ids = [0 for i in range(num_tasks)]  # hardcoded prior
+            if config['common'].get('share_decoder_group', False):
+                share_decoder_group_ids = config['common']['share_decoder_group'][:num_tasks]
+            else:
+                share_decoder_group_ids = [i for i in range(num_tasks)]  # hardcoded prior
+            # use modality groups to control the communication of neck, adapter, and output proj
+            if config['common'].get('share_rgb_group', False):
+                share_rgb_group_ids = config['common']['share_rgb_group'][:num_tasks]
+            else:
+                share_rgb_group_ids = [i for i in range(num_tasks)]  # hardcoded prior
+            if config['common'].get('share_dense_labeling_group', False):
+                share_dense_labeling_group_ids = config['common']['share_dense_labeling_group'][:num_tasks]
+            else:
+                share_dense_labeling_group_ids = [i for i in range(num_tasks)]
+            if config['common'].get('share_sparse_labeling_group', False):
+                share_sparse_labeling_group_ids = config['common']['share_sparse_labeling_group'][:num_tasks]
+            else:
+                share_sparse_labeling_group_ids = [i for i in range(num_tasks)]
+            if config['common'].get('share_text_group', False):
+                share_text_group_ids = config['common']['share_text_group'][:num_tasks]
+            else:
+                share_text_group_ids = [i for i in range(num_tasks)]
+            if config['common'].get('share_video_group', False):
+                share_video_group_ids = config['common']['share_video_group'][:num_tasks]
+            else:
+                share_video_group_ids = [i for i in range(num_tasks)]
+            if config['common'].get('share_modality_group', False):
+                share_modality_group_ids = config['common']['share_modality_group'][:num_tasks]
+            else:
+                share_modality_group_ids = [i for i in range(num_tasks)]
+            # ginfo = specific_group_split_modality_groups(group_spec, share_backbone_group_ids,
+            #                                  share_decoder_group_ids, share_rgb_group_ids,
+            #                                        share_video_group_ids, share_dense_labeling_group_ids,
+            #                                        share_sparse_labeling_group_ids, share_text_group_ids,
+            #                                              share_modality_group_ids)
+            import easydict
+            ginfo = easydict.EasyDict()
+            ginfo.task_id = 5
+            ginfo.task_num = 5
+            ginfo.backbone_share_group = None
+            ginfo.task_rank = 0
+            loss_weight_sum = float(np.sum(np.array([task['loss_weight'] for task in tasks.values()])))
+            ginfo.task_name = tasks[ginfo.task_id]['name']
+            ginfo.task_names = [tasks[i]['name'] for i in range(ginfo.task_num)]
+            # ginfo.task_weight = float(tasks[ginfo.task_id]['loss_weight']) / loss_weight_sum
+            ginfo.task_weight = float(tasks[ginfo.task_id]['loss_weight'])
+            ginfo.task_type = tasks[ginfo.task_id].get('type', 'normal')
+            ginfo.task_types = [tasks[i].get('type', 'normal') for i in range(ginfo.task_num)]
+            ginfo.task_random_seed = tasks[ginfo.task_id].get('random_seed', 0)
+            for p in task_specific_param:
+                if p in config['tasks'][ginfo.task_id]:
+                    config['common'][p] = config['tasks'][ginfo.task_id][p]
+                    printlog('{} of task{} has been overided to {}'.format(p, ginfo.task_id, config['common'][p]))
+        logger = logging.getLogger('global_logger')
+        self.world_size = world_size
+        self.rank = rank
+        self.ginfo = ginfo
+        self.config = config
+        self.config_file = config_file
+    # def __repr__(self) -> str:
+    #     return str(self.config)

core/data/__init__.py ADDED Viewed

File without changes

core/data/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (188 Bytes). View file

core/data/datasets/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from .images.pedattr_dataset import MultiAttrDataset
+from .images.pos_dataset_dev import COCOPosDatasetDev, MPIIPosDatasetDev
+from .images.parsing_dataset import (Human3M6ParsingDataset, LIPParsingDataset, CIHPParsingDataset, ATRParsingDataset,
+                                     DeepFashionParsingDataset, VIPParsingDataset, ModaNetParsingDataset,
+                                     PaperDollParsingDataset)
+from .images.multi_posedataset import MultiPoseDatasetDev
+from .images.peddet_dataset_v2 import PedestrainDetectionDataset_v2, PedestrainDetectionDataset_v2demo
+from .images.image_caption_dataset import CocoCaption, CocoCaptiondemo
+from .sequences.skeleton_action_dataset import mmSkeletonDataset
+from .images.smpl_dataset_v2 import MeshTSVYamlDataset
+from core.utils import printlog
+def dataset_entry(config):
+    printlog('config[kwargs]',config['kwargs'])
+    return globals()[config['type']](**config['kwargs'])

core/data/datasets/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (1.41 kB). View file

core/data/datasets/images/__pycache__/image_caption_dataset.cpython-312.pyc ADDED Viewed

Binary file (13.9 kB). View file

core/data/datasets/images/__pycache__/multi_posedataset.cpython-312.pyc ADDED Viewed

Binary file (18.9 kB). View file

core/data/datasets/images/__pycache__/parsing_dataset.cpython-312.pyc ADDED Viewed

Binary file (40.6 kB). View file

core/data/datasets/images/__pycache__/pedattr_dataset.cpython-312.pyc ADDED Viewed

Binary file (34.7 kB). View file

core/data/datasets/images/__pycache__/peddet_dataset_v2.cpython-312.pyc ADDED Viewed

Binary file (28.9 kB). View file

core/data/datasets/images/__pycache__/pos_dataset_dev.cpython-312.pyc ADDED Viewed

Binary file (33.8 kB). View file

core/data/datasets/images/__pycache__/seg_dataset_dev.cpython-312.pyc ADDED Viewed

Binary file (16.2 kB). View file

core/data/datasets/images/__pycache__/smpl_dataset_v2.cpython-312.pyc ADDED Viewed

Binary file (18 kB). View file

core/data/datasets/images/image_caption_dataset.py ADDED Viewed

	@@ -0,0 +1,261 @@

+import os
+import re
+import json
+import random
+import torch
+import torchvision
+import numpy as np
+import pandas as pd
+import os.path as osp
+from PIL import Image
+from collections import defaultdict
+from transformers import BertTokenizer
+from torch.utils.data import Dataset
+import torch.distributed as dist
+from torchvision import transforms
+from torchvision.transforms import PILToTensor, ToTensor
+from core.data.transforms.caption_transforms import RandomAugment
+def pre_caption(caption, max_words=30):
+    caption = re.sub(
+        r"([.!\"()*#:;~])",
+        ' ',
+        caption.lower(),
+    )
+    caption = re.sub(
+        r"\s{2,}",
+        ' ',
+        caption,
+    )
+    caption = caption.rstrip('\n')
+    caption = caption.strip(' ')
+    #truncate caption
+    caption_words = caption.split(' ')
+    if len(caption_words) > max_words:
+        caption = ' '.join(caption_words[ :max_words])
+    return caption
+def data_transforms(split_type='train', img_size=384, min_scale=0.5):
+    if split_type == 'train':
+        data_transforms = transforms.Compose([
+            transforms.RandomResizedCrop(img_size, scale=(min_scale, 1.0), interpolation=Image.BICUBIC),
+            transforms.RandomHorizontalFlip(),
+            RandomAugment(2, 5, isPIL=True,
+                        augs=['Identity','AutoContrast','Brightness','Sharpness','Equalize',
+                                'ShearX', 'ShearY', 'TranslateX', 'TranslateY', 'Rotate']),
+            PILToTensor()
+            # ToTensor()
+            ])
+    else:
+        data_transforms = transforms.Compose([
+            transforms.Resize((img_size, img_size), interpolation=Image.BICUBIC),
+            PILToTensor(),
+            # ToTensor()
+            ])
+    return data_transforms
+class CocoCaption(Dataset):
+    """
+    Implementation of the dataloader for coco_caption.
+    Mainly used in the model training and evaluation.
+    Params:
+        ginfo: group information for Multitask learning.
+        coco_root: root path of coco2014 dataset.
+        anno_root: annotation path of coco captions.
+        bert_dir: path of bert-base-uncased for loading tokenizer.
+        max_words: max length of input captions.
+        img_size: image size.
+        prompt: given prompt to add before captions.
+    """
+    def __init__(self, ginfo, max_words=30, img_size=384, beam_size=1, prompt='', split_type='train',
+                 cuhk_peds=False, cuhk_peds_root=None, cuhk_peds_anno_root=None, cuhk_peds_gt_root=None,
+                 joint_train=False, synth_peds_root=None, joint_train_anno_root=None, coco_train=False,
+                 coco_root=None, anno_root=None, bert_dir='', mals_root=None, luperson_root=None):
+        self.task_name = ginfo.task_name
+        self.rank = dist.get_rank()
+        self.prompt = prompt
+        # plus one for bos token
+        self.max_words = max_words + 1
+        self.img_size = img_size
+        self.split_type = split_type
+        self.beam_size = beam_size
+        self.transforms = data_transforms(split_type, img_size)
+        self.tokenizer = BertTokenizer.from_pretrained(bert_dir, do_lower=True)
+        self.cuhk_peds = cuhk_peds
+        self.joint_train = joint_train
+        self.coco_train = coco_train
+        if joint_train:
+            self.annotation = json.load(open(joint_train_anno_root, 'r'))
+            self.cuhk_peds_root = cuhk_peds_root
+            self.synth_peds_root = synth_peds_root
+            self.mals_root = mals_root
+            self.luperson_root = luperson_root
+            self.coco_gt_file = cuhk_peds_gt_root
+        elif cuhk_peds:
+            self.annotation = json.load(open(cuhk_peds_anno_root, 'r'))
+            self.coco_gt_file = cuhk_peds_gt_root
+            self.coco_root = cuhk_peds_root
+        elif coco_train:
+            self.coco_root = coco_root
+            self.coco_gt_file = osp.join(anno_root, 'coco_gt', 'coco_karpathy_' + split_type + '_gt.json')
+            self.annotation = json.load(open(osp.join(anno_root, 'coco_karpathy_' + split_type + '.json'), 'r'))
+    def __len__(self):
+        return len(self.annotation)
+    def __getitem__(self, index):
+        sample = self.annotation[index]
+        if self.joint_train and self.split_type == 'train':
+            if sample['split'] == 'cuhk_peds':
+                image_path = osp.join(self.cuhk_peds_root, sample['image'])
+            elif sample['split'] == 'mals':
+                image_path = osp.join(self.mals_root, sample['image'])
+            elif sample['split'] == 'luperson':
+                image_path = osp.join(self.luperson_root, sample['image'])
+            else:
+                image_path = osp.join(self.synth_peds_root, sample['image'])
+        else:
+            image_path = osp.join(self.coco_root, sample['image'])
+        image = Image.open(image_path).convert('RGB')
+        image = self.transforms(image)
+        if self.split_type != 'train':
+            caption_id = np.zeros(self.max_words - 1, dtype=np.int32)
+            token_type_id = np.zeros(self.max_words - 1, dtype=np.int32)
+            caption_pad_mask = np.zeros(self.max_words - 1, dtype=np.int32)
+            if self.cuhk_peds:
+                img_id = sample['image'].split('.')[0]
+            else:
+                img_id = sample['image'].split('/')[-1].strip('.jpg').split('_')[-1]
+            coco_gt_file = self.coco_gt_file
+            beam_size = self.beam_size
+            return {'image': image, 'input_id': caption_id, 'image_id': int(img_id) if not self.cuhk_peds else img_id,
+                    'coco_gt_file': coco_gt_file, 'beam_size': beam_size,
+                    'token_type_id': token_type_id, 'padding_mask': caption_pad_mask}
+        caption = self.prompt + pre_caption(sample['caption'], self.max_words)
+        caption_encode = self.tokenizer.encode_plus(caption, max_length=self.max_words, pad_to_max_length=True,
+                                                    return_attention_mask=True, return_token_type_ids=True,
+                                                    truncation=True)
+        caption_id, caption_pad_mask, token_type_id = caption_encode['input_ids'], caption_encode['attention_mask'], caption_encode['token_type_ids']
+        caption_id = np.array(caption_id)
+        token_type_id = np.array(token_type_id)
+        caption_pad_mask = np.array(caption_pad_mask)
+        # caption_pad_mask = (1 - np.array(caption_pad_mask)).astype(bool)
+        caption = [caption]
+        output = {'image': image, 'input_id': caption_id, 'token_type_id': token_type_id, 'padding_mask': caption_pad_mask, 'label': caption_id}
+        return output
+    def __repr__(self):
+        return self.__class__.__name__ + \
+               f'rank: {self.rank} task: {self.task_name} mode:{"training" if self.split_type == "train" else "inference"} ' \
+               f'dataset_len:{len(self.annotation)} augmentation: {self.transforms}'
+class CocoCaptiondemo(Dataset):
+    """
+    Implementation of the dataloader for coco_caption.
+    Mainly used in the model training and evaluation.
+    Params:
+        ginfo: group information for Multitask learning.
+        coco_root: root path of coco2014 dataset.
+        anno_root: annotation path of coco captions.
+        bert_dir: path of bert-base-uncased for loading tokenizer.
+        max_words: max length of input captions.
+        img_size: image size.
+        prompt: given prompt to add before captions.
+    """
+    def __init__(self, ginfo, max_words=30, img_size=384, beam_size=1, prompt='', split_type='train', demo_dir='/mnt/cache/tangshixiang/wyz_proj/demo_video_unihcpv2/folder0',
+                 cuhk_peds=False, cuhk_peds_root=None, cuhk_peds_anno_root=None, cuhk_peds_gt_root=None,
+                 joint_train=False, synth_peds_root=None, joint_train_anno_root=None, coco_train=False,
+                 coco_root=None, anno_root=None, bert_dir='', mals_root=None, luperson_root=None):
+        self.task_name = ginfo.task_name
+        self.rank = dist.get_rank()
+        self.prompt = prompt
+        # plus one for bos token
+        self.max_words = max_words + 1
+        self.img_size = img_size
+        self.split_type = split_type
+        self.beam_size = beam_size
+        self.transforms = data_transforms(split_type, img_size)
+        self.tokenizer = BertTokenizer.from_pretrained(bert_dir, do_lower=True)
+        self.cuhk_peds = cuhk_peds
+        self.joint_train = joint_train
+        self.coco_train = coco_train
+        if joint_train:
+            self.annotation = json.load(open(joint_train_anno_root, 'r'))
+            self.cuhk_peds_root = cuhk_peds_root
+            self.synth_peds_root = synth_peds_root
+            self.mals_root = mals_root
+            self.luperson_root = luperson_root
+            self.coco_gt_file = cuhk_peds_gt_root
+        elif cuhk_peds:
+            self.annotation = json.load(open(cuhk_peds_anno_root, 'r'))
+            self.coco_gt_file = cuhk_peds_gt_root
+            self.coco_root = cuhk_peds_root
+        elif coco_train:
+            self.coco_root = coco_root
+            self.coco_gt_file = osp.join(anno_root, 'coco_gt', 'coco_karpathy_' + split_type + '_gt.json')
+            self.annotation = json.load(open(osp.join(anno_root, 'coco_karpathy_' + split_type + '.json'), 'r'))
+        self.demo_dir = demo_dir
+    def __len__(self):
+        return len(os.listdir(self.demo_dir))
+    def __getitem__(self, index):
+        # import pdb; pdb.set_trace()
+        sample = self.annotation[index]
+        if self.joint_train and self.split_type == 'train':
+            if sample['split'] == 'cuhk_peds':
+                image_path = osp.join(self.cuhk_peds_root, sample['image'])
+            elif sample['split'] == 'mals':
+                image_path = osp.join(self.mals_root, sample['image'])
+            elif sample['split'] == 'luperson':
+                image_path = osp.join(self.luperson_root, sample['image'])
+            else:
+                image_path = osp.join(self.synth_peds_root, sample['image'])
+        else:
+            image_path = osp.join(self.coco_root, sample['image'])
+        filename = os.path.join(self.demo_dir, f'frame_{index}.jpg')
+        image = Image.open(filename).convert('RGB')
+        image = self.transforms(image)
+        if self.split_type != 'train':
+            caption_id = np.zeros(self.max_words - 1, dtype=np.int32)
+            token_type_id = np.zeros(self.max_words - 1, dtype=np.int32)
+            caption_pad_mask = np.zeros(self.max_words - 1, dtype=np.int32)
+            if self.cuhk_peds:
+                img_id = sample['image'].split('.')[0]
+            else:
+                img_id = sample['image'].split('/')[-1].strip('.jpg').split('_')[-1]
+            coco_gt_file = self.coco_gt_file
+            beam_size = self.beam_size
+            return {'image': image, 'input_id': caption_id, 'image_id': filename,
+                    'coco_gt_file': coco_gt_file, 'beam_size': beam_size,
+                    'token_type_id': token_type_id, 'padding_mask': caption_pad_mask}
+        caption = self.prompt + pre_caption(sample['caption'], self.max_words)
+        caption_encode = self.tokenizer.encode_plus(caption, max_length=self.max_words, pad_to_max_length=True,
+                                                    return_attention_mask=True, return_token_type_ids=True,
+                                                    truncation=True)
+        caption_id, caption_pad_mask, token_type_id = caption_encode['input_ids'], caption_encode['attention_mask'], \
+        caption_encode['token_type_ids']
+        caption_id = np.array(caption_id)
+        token_type_id = np.array(token_type_id)
+        caption_pad_mask = np.array(caption_pad_mask)
+        # caption_pad_mask = (1 - np.array(caption_pad_mask)).astype(bool)
+        caption = [caption]
+        output = {'image': image, 'input_id': filename, 'token_type_id': token_type_id,
+                  'padding_mask': caption_pad_mask, 'label': caption_id}
+        return output
+    def __repr__(self):
+        return self.__class__.__name__ + \
+            f'rank: {self.rank} task: {self.task_name} mode:{"training" if self.split_type == "train" else "inference"} ' \
+            f'dataset_len:{len(self.annotation)} augmentation: {self.transforms}'

core/data/datasets/images/multi_posedataset.py ADDED Viewed

	@@ -0,0 +1,413 @@

+import copy
+from abc import ABCMeta, abstractmethod
+import numpy as np
+from torch.utils.data import Dataset
+from pathlib import Path
+import os
+import cv2
+import random
+import time
+import os.path as osp
+import os
+import warnings
+from collections import OrderedDict, defaultdict
+from core.data.transforms.pose_transforms import *
+import json #_tricks as json
+import numpy as np
+from xtcocotools.coco import COCO
+from xtcocotools.cocoeval import COCOeval
+import torch.distributed as dist
+from core.utils import sync_print
+class PetrelCOCO(COCO):
+    def __init__(self, annotation_file=None, test_index=None, ann_data=None):
+        """
+        Constructor of Microsoft COCO helper class for reading and visualizing annotations.
+        :param annotation_file (str): location of annotation file
+        :param image_folder (str): location to the folder that hosts images.
+        :return:
+        """
+        self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict()
+        self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
+        self.anno_file = [annotation_file]
+        self.test_index = test_index
+        if annotation_file is not None:
+            print('loading annotations into memory...')
+            tic = time.time()
+            # https://github.com/cocodataset/cocoapi/pull/453/
+            if ann_data == None:
+                with open(annotation_file, 'r') as f:
+                    dataset = json.load(f)
+            else:
+                dataset = ann_data
+            assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset))
+            print('Done (t={:0.2f}s)'.format(time.time()- tic))
+            self.dataset = dataset
+            self.createIndex()
+        if 'annotations' in self.dataset:
+            for i in range(len(self.dataset['annotations'])):
+                if self.test_index is not None:
+                    keypoints = np.array(self.dataset['annotations'][i]['keypoints']).reshape([-1, 3])
+                    keypoints = keypoints[self.test_index, :]
+                    self.dataset['annotations'][i]['keypoints'] = keypoints.reshape([-1]).tolist()
+                if 'iscrowd' not in self.dataset['annotations'][i]:
+                    self.dataset['annotations'][i]['iscrowd'] = False
+class MultiPoseDatasetDev(Dataset):
+    def __init__(self,
+                 ginfo,
+                 ann_file,
+                 img_prefix,
+                 data_cfg,
+                 test_mode=False,
+                 use_udp=False,
+                 dataset_name='coco',
+                 use_ceph=False,
+                 simp_aug=False,
+                 **kwargs):
+        assert dataset_name in ['coco', 'aic', 'posetrack', 'halpe', 'JRDB2022', 'h36m', 'mhp', 'penn_action', '3DPW', '3DHP', 'AIST'], "invalid dataset name input"
+        self.dataset_name = dataset_name
+        self.image_info = {}
+        self.ann_info = {}
+        self.initialized = False
+        self.use_ceph = True
+        self.annotations_path = ann_file
+        self.img_prefix = img_prefix
+        self.test_mode = test_mode
+        print('data_cfg0',data_cfg)
+        # data_cfg=demjson.decode(data_cfg)
+        # print('data_cfg',data_cfg)
+        self.ann_info['image_size'] = np.array(data_cfg['image_size'])
+        self.ann_info['heatmap_size'] = np.array(data_cfg['heatmap_size'])
+        self.ann_info['num_joints'] = data_cfg['num_joints']
+        self.ann_info['inference_channel'] = data_cfg['inference_channel']
+        self.ann_info['num_output_channels'] = data_cfg['num_output_channels']
+        self.ann_info['dataset_channel'] = data_cfg['dataset_channel']
+        self.db = []
+        self.task_name = ginfo.task_name
+        if test_mode:
+            pipeline = [
+                LoadImageFromFile(use_ceph=use_ceph),
+                TopDownAffine(use_udp=use_udp),
+                ToUNTensor(),
+                Collect(keys=['image'],
+                        meta_keys=['image_file', 'center', 'scale', 'rotation', 'bbox_score', 'flip_pairs'])
+            ]
+        else:
+            if self.dataset_name in ['coco', 'aic'] or simp_aug:
+                pipeline = [
+                    LoadImageFromFile(use_ceph=use_ceph),
+                    TopDownRandomFlip(flip_prob=0.5),
+                    TopDownHalfBodyTransform(num_joints_half_body=8, prob_half_body=0.3),
+                    TopDownGetRandomScaleRotation(rot_factor=40, scale_factor=0.5),
+                    TopDownAffine(use_udp=use_udp),
+                    ToUNTensor(),
+                    TopDownGenerateTarget(sigma=2, encoding='UDP' if use_udp else 'MSRA'),
+                    Collect(keys=['image', 'label', 'target_weight'],
+                            meta_keys=['image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale','rotation',
+                                    'bbox_score', 'flip_pairs'])
+                ]
+            elif self.dataset_name in ['posetrack', 'halpe', 'penn_action', '3DPW', 'mhp']:
+                pipeline = [
+                    LoadImageFromFile(),
+                    TopDownGetBboxCenterScale(padding=1.25),
+                    TopDownRandomShiftBboxCenter(shift_factor=0.16, prob=0.3),
+                    TopDownRandomFlip(flip_prob=0.5),
+                    TopDownHalfBodyTransform(num_joints_half_body=8, prob_half_body=0.3),
+                    TopDownGetRandomScaleRotation(rot_factor=40, scale_factor=0.5),
+                    TopDownAffine(use_udp=use_udp),
+                    ToUNTensor(),
+                    TopDownGenerateTarget(sigma=2, encoding='UDP' if use_udp else 'MSRA'),
+                    Collect(keys=['image', 'label', 'target_weight'],
+                            meta_keys=['image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale','rotation',
+                                    'bbox_score', 'flip_pairs'])
+                ]
+            else:
+                pipeline = [
+                    LoadImageFromFile(),
+                    # TopDownGetBboxCenterScale(padding=1.25),
+                    TopDownRandomShiftBboxCenter(shift_factor=0.16, prob=0.3),
+                    TopDownRandomFlip(flip_prob=0.5),
+                    TopDownHalfBodyTransform(num_joints_half_body=8, prob_half_body=0.3),
+                    TopDownGetRandomScaleRotation(rot_factor=40, scale_factor=0.5),
+                    TopDownAffine(use_udp=use_udp),
+                    ToUNTensor(),
+                    TopDownGenerateTarget(sigma=2, encoding='UDP' if use_udp else 'MSRA'),
+                    Collect(keys=['image', 'label', 'target_weight'],
+                            meta_keys=['image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', 'rotation',
+                                       'bbox_score', 'flip_pairs'])
+                ]
+        self.pipeline = ComposeX(pipeline)
+        # dict_keys(['image_file', 'center', 'scale', 'bbox', 'rotation', 'joints_3d', 'joints_3d_visible', 'dataset',
+        #            'bbox_score', 'bbox_id', 'ann_info', 'image', 'flipped',
+        #            'label',  ****'target' as in mmlab****
+        #            'target_weight'])
+        self.use_gt_bbox = data_cfg['use_gt_bbox']
+        self.bbox_file = data_cfg['bbox_file'] if data_cfg['bbox_file'].startswith('/mnt') else (Path(__file__).parent / 'resources' / data_cfg['bbox_file']).resolve()
+        self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+        if 'image_thr' in data_cfg:
+            warnings.warn(
+                'image_thr is deprecated, '
+                'please use det_bbox_thr instead', DeprecationWarning)
+            self.det_bbox_thr = data_cfg['image_thr']
+        self.ann_info['flip_pairs'] =  data_cfg['flip_pairs']
+        self.ann_info['upper_body_ids'] = data_cfg['upper_body_ids']
+        self.ann_info['lower_body_ids'] = data_cfg['lower_body_ids']
+        self.ann_info['use_different_joint_weights'] = False
+        self.ann_info['joint_weights'] = np.array(
+            data_cfg['joint_weights'],
+            dtype=np.float32).reshape((self.ann_info['num_joints'], 1))
+        # 'https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/'
+        # 'pycocotools/cocoeval.py#L523'
+        self.coco = PetrelCOCO(ann_file)
+        cats = [
+            cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds())
+        ]
+        self.classes = ['__background__'] + cats
+        self.num_classes = len(self.classes)
+        self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
+        self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds()))
+        self._coco_ind_to_class_ind = dict(
+            (self._class_to_coco_ind[cls], self._class_to_ind[cls])
+            for cls in self.classes[1:])
+        self.img_ids = self.coco.getImgIds()
+        self.num_images = len(self.img_ids)
+        self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
+        self.db = self._get_db()
+        print(f'=> dataset: {self.dataset_name} num_images: {self.num_images}')
+        print(f'=> dataset: {self.dataset_name} load {len(self.db)} samples')
+    @staticmethod
+    def _get_mapping_id_name(imgs):
+        """
+        Args:
+            imgs (dict): dict of image info.
+        Returns:
+            tuple: Image name & id mapping dicts.
+            - id2name (dict): Mapping image id to name.
+            - name2id (dict): Mapping image name to id.
+        """
+        id2name = {}
+        name2id = {}
+        for image_id, image in imgs.items():
+            file_name = image['file_name']
+            id2name[image_id] = file_name
+            name2id[file_name] = image_id
+        return id2name, name2id
+    def _get_db(self):
+        """Load dataset."""
+        if (not self.test_mode) or self.use_gt_bbox:
+            # use ground truth bbox
+            gt_db = self._load_coco_keypoint_annotations()
+        else:
+            # use bbox from detection
+            gt_db = self._load_coco_person_detection_results()
+        return gt_db
+    def _load_coco_keypoint_annotations(self):
+        """Ground truth bbox and keypoints."""
+        gt_db = []
+        for img_id in self.img_ids:
+            gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id))
+        return gt_db
+    def _load_coco_keypoint_annotation_kernel(self, img_id):
+        """load annotation from COCOAPI.
+        Note:
+            bbox:[x1, y1, w, h]
+        Args:
+            img_id: coco image id
+        Returns:
+            dict: db entry
+        """
+        img_ann = self.coco.loadImgs(img_id)[0]
+        width = img_ann['width']
+        height = img_ann['height']
+        num_joints = self.ann_info['num_joints']
+        ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+        objs = self.coco.loadAnns(ann_ids)
+        # sanitize bboxes
+        valid_objs = []
+        for obj in objs:
+            if 'bbox' not in obj:
+                continue
+            x, y, w, h = obj['bbox']
+            x1 = max(0, x)
+            y1 = max(0, y)
+            x2 = min(width - 1, x1 + max(0, w - 1))
+            y2 = min(height - 1, y1 + max(0, h - 1))
+            if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+                obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+                valid_objs.append(obj)
+        objs = valid_objs
+        bbox_id = 0
+        rec = []
+        for obj in objs:
+            if 'keypoints' not in obj:
+                continue
+            if max(obj['keypoints']) == 0:
+                continue
+            if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+                continue
+            joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+            joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+            keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+            if self.dataset_name == 'posetrack':
+                keypoints = np.delete(keypoints, [3, 4], axis=0)  # keypoint idx == 3 and 4 not annot
+            elif self.dataset_name == 'halpe':
+                keypoints = keypoints[:17,:]  # halpe has only 17 valid kp
+            joints_3d[:, :2] = keypoints[:, :2]
+            joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+            center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+            image_file = os.path.join(self.img_prefix, self.id2name[img_id])
+            rec.append({
+                'image_file': image_file,
+                'center': center,
+                'scale': scale,
+                'bbox': obj['clean_bbox'][:4],
+                'rotation': 0,
+                'joints_3d': joints_3d,
+                'joints_3d_visible': joints_3d_visible,
+                'dataset': self.dataset_name,
+                'bbox_score': 1,
+                'bbox_id': bbox_id
+            })
+            bbox_id = bbox_id + 1
+        return rec
+    def _xywh2cs(self, x, y, w, h):
+        """This encodes bbox(x,y,w,w) into (center, scale)
+        Args:
+            x, y, w, h
+        Returns:
+            tuple: A tuple containing center and scale.
+            - center (np.ndarray[float32](2,)): center of the bbox (x, y).
+            - scale (np.ndarray[float32](2,)): scale of the bbox w & h.
+        """
+        aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[
+            'image_size'][1]
+        center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)
+        if (not self.test_mode) and np.random.rand() < 0.3:
+            center += 0.4 * (np.random.rand(2) - 0.5) * [w, h]
+        if w > aspect_ratio * h:
+            h = w * 1.0 / aspect_ratio
+        elif w < aspect_ratio * h:
+            w = h * aspect_ratio
+        # pixel std is 200.0
+        scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
+        # padding to include proper amount of context
+        scale = scale * 1.25
+        return center, scale
+    def _load_coco_person_detection_results(self):
+        """Load coco person detection results."""
+        num_joints = self.ann_info['num_joints']
+        all_boxes = None
+        with open(self.bbox_file, 'r') as f:
+            all_boxes = json.load(f)
+        if not all_boxes:
+            raise ValueError('=> Load %s fail!' % self.bbox_file)
+        print(f'=> Total boxes: {len(all_boxes)}')
+        kpt_db = []
+        bbox_id = 0
+        for det_res in all_boxes:
+            if det_res['category_id'] != 1:
+                continue
+            image_file = os.path.join(self.img_prefix,
+                                      self.id2name[det_res['image_id']])
+            box = det_res['bbox']
+            score = det_res['score']
+            if score < self.det_bbox_thr:
+                continue
+            center, scale = self._xywh2cs(*box[:4])
+            joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+            joints_3d_visible = np.ones((num_joints, 3), dtype=np.float32)
+            kpt_db.append({
+                'image_file': image_file,
+                'center': center,
+                'scale': scale,
+                'rotation': 0,
+                'bbox': box[:4],
+                'bbox_score': score,
+                'dataset': self.dataset_name,
+                'joints_3d': joints_3d,
+                'joints_3d_visible': joints_3d_visible,
+                'bbox_id': bbox_id
+            })
+            bbox_id = bbox_id + 1
+        print(f'=> Total boxes after filter '
+              f'low score@{self.det_bbox_thr}: {bbox_id}')
+        return kpt_db
+    def __len__(self):
+        """Get the size of the dataset."""
+        return len(self.db)
+    def __getitem__(self, idx):
+        """Get the sample given index."""
+        results = copy.deepcopy(self.db[idx])
+        results['ann_info'] = self.ann_info
+        out = self.pipeline(results)
+        C = self.ann_info['num_joints']
+        if 'label' in out:
+            out['dense_labeling'] = np.resize(out['label'],
+                                              (C, self.ann_info['image_size'][0], self.ann_info['image_size'][1]))
+        else:
+            out['dense_labeling'] = np.zeros((C, self.ann_info['image_size'][0], self.ann_info['image_size'][1]))
+        # del out['ann_info']
+        return out  # dict_keys(['image_file', 'center', 'scale', 'bbox', 'rotation', 'joints_3d', 'joints_3d_visible',
+                               # 'dataset', 'bbox_score', 'bbox_id', 'ann_info', 'image', 'flipped', 'label',
+                               # 'target_weight'])

core/data/datasets/images/parsing_dataset.py ADDED Viewed

	@@ -0,0 +1,1084 @@

+import os
+import os.path as osp
+import cv2
+import torch
+import io
+import numpy as np
+import itertools
+from typing import Any, Dict, List, Tuple, Union
+from torch.utils import data
+from torch.nn import functional as F
+from PIL import Image
+from core.utils import cv2_loader, pil_loader
+from core.data.datasets.images.seg_dataset_dev import Instances, BitMasks
+import random
+import torch.distributed as dist
+import core.data.transforms.parsing_transforms as T
+from core.data.transforms.pose_transforms import DataContainer
+try:
+    from petrel_client.client import Client as Client
+    s3client = Client(boto=True,
+                      enable_multi_cluster=True,
+                      enable_mc=True)
+except:
+    print("ceph can not be used")
+palette_dict = {
+    'human3m6_parsing': np.array(
+        [[0, 0, 0], [128, 0, 0], [255, 0, 0], [0, 85, 0], [170, 0, 51], [255, 85, 0], [0, 0, 85], [0, 119, 221],
+         [85, 85, 0], [0, 85, 85],
+         [85, 51, 0], [52, 86, 128], [0, 128, 0], [0, 0, 255], [0, 255, 0],
+         [51, 170, 221], [0, 255, 255], [255, 170, 85], [85, 255, 170], [170, 85, 52],
+         [170, 255, 85], [255, 255, 0], [255, 170, 0], [255, 0, 170], [170, 0, 255]]),
+    'LIP_parsing': np.array([[0, 0, 0], [128, 0, 0], [255, 0, 0], [0, 85, 0], [170, 0, 51],
+                             [255, 85, 0], [0, 0, 85], [0, 119, 221], [85, 85,
+                                                                       0], [0, 85, 85],
+                             [85, 51, 0], [52, 86, 128], [0, 128, 0], [0, 0, 255],
+                             [51, 170, 221], [0, 255, 255], [85, 255, 170], [170, 255, 85],
+                             [255, 255, 0], [255, 170, 0]]),
+    'CIHP_parsing': np.array([[0, 0, 0], [128, 0, 0], [255, 0, 0], [0, 85, 0], [170, 0, 51],
+                              [255, 85, 0], [0, 0, 85], [0, 119, 221], [85, 85,
+                                                                        0], [0, 85, 85],
+                              [85, 51, 0], [52, 86, 128], [0, 128, 0], [0, 0, 255],
+                              [51, 170, 221], [0, 255, 255], [85, 255, 170], [170, 255, 85],
+                              [255, 255, 0], [255, 170, 0]]),
+    'ATR_parsing': np.array([[0, 0, 0], [128, 0, 0], [255, 0, 0], [0, 85, 0], [170, 0, 51],
+                             [255, 85, 0], [0, 0, 85], [0, 119, 221], [85, 85,
+                                                                       0], [0, 85, 85],
+                             [85, 51, 0], [52, 86, 128], [0, 128, 0], [0, 0, 255],
+                             [51, 170, 221], [0, 255, 255], [85, 255, 170], [170, 255, 85]]),
+}
+def get_unk_mask_indices(image,num_labels,known_labels,epoch=1,testing=False,):
+    if testing:
+        # for consistency across epochs and experiments, seed using hashed image array
+        random.seed(hashlib.sha1(np.array(image)).hexdigest())
+        unk_mask_indices = random.sample(range(num_labels), (num_labels-int(known_labels)))
+    else:
+        # sample random number of known labels during training
+        if known_labels>0:
+            random.seed()
+            num_known = random.randint(0,int(num_labels*0.75))
+        else:
+            num_known = 0
+        unk_mask_indices = random.sample(range(num_labels), (num_labels-num_known))
+    return unk_mask_indices
+class Human3M6ParsingDataset(data.Dataset):
+    task_name = 'human3m6_parsing'
+    left_right_pairs = np.array([[1, 6],
+                                 [2, 7],
+                                 [3, 8],
+                                 [17, 25],
+                                 [18, 26],
+                                 [19, 27],
+                                 [33, 38],
+                                 [34, 39],
+                                 [49, 56],
+                                 [50, 58]])
+    label_mapper = np.arange(60)
+    evaluate_size = (1000, 1000)
+    def __init__(self,
+                 ginfo,
+                 data_path,
+                 dataset='train',
+                 data_use_ratio=1,
+                 is_train=True,
+                 cfg=None,
+                 **kwargs):
+        """human3.6m dataset for human parsing
+        Args:
+            root_dir ([str]): where dataset
+            dataset: train / val
+            cfg: yaml format config
+            # 0  : background
+            # 1  : right hip
+            # 2  : right knee
+            # 3  : right foot
+            # 6  : left hip
+            # 7  : left knee
+            # 8  : left foot
+            # 17 : left shoulder
+            # 18 : left elbow
+            # 19 : left hand
+            # 25 : right shoulder
+            # 26 : right elbow
+            # 27 : right hand
+            # 32 : crotch
+            # 33 : right thigh
+            # 34 : right calf
+            # 38 : left thigh
+            # 39 : left calf
+            # 43 : lower spine
+            # 44 : upper spine
+            # 46 : head
+            # 49 : left arm
+            # 50 : left forearm
+            # 56 : right arm
+            # 58 : right forearm
+        """
+        # self.task_name = 'human3m6_parsing'
+        self.cfg = cfg
+        self.dataset = dataset
+        self.is_train = is_train
+        self.data_use_ratio = data_use_ratio
+        self.pseudo_labels = self.cfg.get('Pseudo_labels', False)
+        self.stride_level = self.cfg.get('stride_level', 1)
+        # self.palette = palette_dict[self.task_name]
+        self.pseudo_labels_palette = palette_dict[self.cfg.get('Pseudo_labels_palette','human3m6_parsing')]
+        self.ignore2endclass = self.cfg.get('ignore2endclass', False)
+        self.img_list, self.label_list, self.name_list = self._list_dirs(data_path)
+        index = np.arange(0, len(self.img_list))
+        random.shuffle(index)
+        self.img_list = np.array(self.img_list)
+        self.label_list = np.array(self.label_list)
+        self.name_list = np.array(self.name_list)
+        self.img_list = self.img_list[index].tolist()
+        self.label_list = self.label_list[index].tolist()
+        self.name_list = self.name_list[index].tolist()
+        self.images = self.img_list
+        self.labels = self.label_list
+        self.ignore_label = cfg.ignore_value
+        self.num = len(self.images)
+        self.num_classes = len(self.cfg.label_list)  # - 1
+        assert self.num_classes == self.cfg.num_classes, f"num of class mismatch, len(label_list)={self.num_classes}, num_classes:{self.cfg.num_classes}"
+        self.rank = dist.get_rank()
+        self.world_size = dist.get_world_size()
+        self.original_label = np.array(self.cfg.label_list)
+        for i, l in enumerate(self.original_label):
+            self.label_mapper[l] = i
+        self.mapped_left_right_pairs = self.label_mapper[self.left_right_pairs] if self.left_right_pairs is not None else None
+        if self.is_train:
+            augs = T.compose([T.hflip(cfg.get("is_flip", False), self.mapped_left_right_pairs),
+                              T.resize_image(cfg.crop_size),
+                              T.multi_scale(cfg.get("is_multi_scale", False), scale_factor=cfg.get("scale_factor", 11),
+                                            center_crop_test=cfg.get("center_crop_test", False),
+                                            base_size=cfg.base_size,
+                                            crop_size=cfg.crop_size,
+                                            ignore_label=cfg.get("ignore_value", 255)),
+                              T.rotate(cfg.get("is_rotate", False), degree=cfg.get("degree", 30),
+                                       p=cfg.get("possibility", 0.6), pad_val=cfg.get("pad_val", 0),
+                                       seg_pad_val=cfg.get("ignore_value", 255)),
+                              T.PhotoMetricDistortion(cfg.get('is_photometricdistortion', False),
+                                                      brightness_delta=cfg.get('brightness', 32),
+                                                      contrast_range=cfg.get('contrast_range', [0.5, 1.5]),
+                                                      saturation_range=cfg.get("saturation_range", [0.5, 1.5]),
+                                                      hue_delta=cfg.get('hue', 18)
+                                                      ),
+                              T.transpose()])
+        else:
+            augs = T.compose([T.resize_image_eval(cfg.eval_crop_size),
+                              T.transpose()])
+        self.augs = augs
+        self.initialized = False
+        self.use_ceph = True
+    def __len__(self):
+        return len(self.img_list)
+    def _ignore_to_endclass(self, label):
+        label[label==self.ignore_label] = self.num_classes
+        return label
+    def _read_one(self, index=None):
+        if index == None:
+            index = np.random.randint(self.num)
+        filename = self.img_list[index]
+        try:
+            img = Image.open(filename).convert('RGB')
+            img = np.array(img)[:,:,::-1]
+        except:
+            outputName = "failed_to_read_in_train.txt"
+            with open(outputName, "a") as g:
+                g.write("%s\n" % (filename))
+            print('Read image[{}] failed ({})'.format(index, filename))
+            ## if fail then recursive call _read_one without idx
+            return self._read_one()
+        gt_label = self.label_list[index]
+        try:
+            label = np.array(Image.open(gt_label))
+        except:
+            outputName = "failed_to_read_in_train_labels.txt"
+            with open(outputName, "a") as g:
+                g.write("%s\n" % (filename))
+            print('Read image[{}] failed ({})'.format(index, gt_label))
+            ## if fail then recursive call _read_one without idx
+            return self._read_one()
+        return img, label
+    def __getitem__(self, index):
+        dataset_dict = {}
+        dataset_dict["filename"] = self.name_list[index]
+        image, parsing_seg_gt = self._read_one(index)
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        self._record_image_size(dataset_dict, image)
+        if self.pseudo_labels:
+            image, parsing_seg_gt = self.augs(image, parsing_seg_gt)
+            image = torch.as_tensor(np.ascontiguousarray(image))
+            parsing_seg_gt = torch.as_tensor(np.ascontiguousarray(parsing_seg_gt))
+            dataset_dict["image"] = image
+            dataset_dict['PL_gt'] = parsing_seg_gt
+            return dataset_dict
+        parsing_seg_gt = self._encode_label(parsing_seg_gt)  # - 1 no need to filter background in human parsing
+        size = parsing_seg_gt.size
+        if not self.is_train:
+            if len(self.evaluate_size) == 2:
+                dataset_dict["gt"] = np.copy(
+                    cv2.resize(parsing_seg_gt, self.evaluate_size, interpolation=cv2.INTER_LINEAR_EXACT).astype(np.int_))
+            else:
+                # use DataContainer type to avoid being batched as tensors
+                dataset_dict["gt"] = DataContainer(np.copy(parsing_seg_gt.astype(np.int_)))
+        parsing_seg_gt = parsing_seg_gt.astype("double")
+        assert len(parsing_seg_gt), "parsing needs gt to train"
+        image, parsing_seg_gt = self.augs(image, parsing_seg_gt)
+        if self.stride_level>1:
+            temp_h, temp_w = parsing_seg_gt.shape
+            parsing_seg_gt = np.asarray(Image.fromarray(parsing_seg_gt).convert('P').resize((int(temp_h/self.stride_level), int(temp_w/self.stride_level))))
+        if self.ignore2endclass:
+            parsing_seg_gt = self._ignore_to_endclass(parsing_seg_gt)
+        image = torch.as_tensor(np.ascontiguousarray(image))
+        parsing_seg_gt = torch.as_tensor(parsing_seg_gt.astype("long"))
+        image_shape = (image.shape[-2], image.shape[-1])  # h, w
+        dataset_dict["image"] = image
+        if not self.is_train:
+            if self.cfg.get('label_mask', False):
+                m = torch.ones(self.num_classes,dtype=torch.int64)*-1 # mask all labels
+                dataset_dict['mask'] = m
+            return dataset_dict
+        dataset_dict["label"] = parsing_seg_gt.long()  # not used in test
+        # Prepare per-category binary masks
+        parsing_seg_gt = parsing_seg_gt.numpy()
+        instances = Instances(image_shape)
+        classes = np.unique(parsing_seg_gt)
+        # remove ignored region
+        if self.cfg.get('add_zero_mask',False):
+            classes = np.array(list(range(self.num_classes)))
+        classes = classes[classes != self.ignore_label]
+        instances.gt_classes = torch.tensor(classes, dtype=torch.int64)
+        if self.cfg.get('label_mask', False):
+            m = np.zeros(self.num_classes)
+            m[classes] = 1
+            mask = torch.tensor(m, dtype=torch.int64).clone()
+            unk_mask_indices = get_unk_mask_indices(image, self.num_classes, known_labels=100,) #  set known_labels>1 to use label masking training
+            mask.scatter_(0, torch.Tensor(unk_mask_indices).long(), -1)
+            dataset_dict['mask'] = mask
+        masks = []
+        for class_id in classes:
+            masks.append(parsing_seg_gt == class_id)
+        if len(masks) == 0:
+            # Some image does not have annotation (all ignored)
+            instances.gt_masks = torch.zeros((0, parsing_seg_gt.shape[-2], parsing_seg_gt.shape[-1]))
+        else:
+            masks = BitMasks(
+                torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks])
+            )
+            instances.gt_masks = masks.tensor
+        dataset_dict["instances"] = instances  # not used in test
+        return dataset_dict  # {'image': img_mask, 'label': target_mask, 'instances': xxx, 'filename': img_name}
+    @staticmethod
+    def _record_image_size(dataset_dict, image):
+        """
+        Raise an error if the image does not match the size specified in the dict.
+        """
+        # To ensure bbox always remap to original image size
+        if "width" not in dataset_dict:
+            dataset_dict["width"] = image.shape[1]
+        if "height" not in dataset_dict:
+            dataset_dict["height"] = image.shape[0]
+    def _list_dirs(self, data_path):
+        img_list = list()
+        label_list = list()
+        name_list = list()
+        if self.dataset == 'train':
+            train_type = 'train'
+        elif self.dataset == 'val':
+            train_type = 'eval'
+        list_txt = osp.join(data_path, f'flist_2hz_{train_type}.txt')
+        # with open(list_txt, 'r') as f:
+        #     data = f.readlines()
+        # data = [d.strip() for d in data]
+        list_lines = s3client.Get(list_txt)
+        if not list_lines:
+            print('File not exist', list_txt)
+            import pdb;
+            pdb.set_trace()
+            raise IOError('File not exist', list_file)
+        list_lines = list_lines.decode('ascii')
+        data = list_lines.split('\n')
+        data = [d for d in data if len(d)]
+        if self.data_use_ratio != 1:
+            data = random.sample(data, int(len(data) * self.data_use_ratio))
+        for d in data:
+            img_path = osp.join(data_path, d)
+            image_name = '/'.join(d.split('/')[2:])
+            label_path = img_path.replace('rgb', 'seg', 1)
+            img_list.append(img_path)
+            label_list.append(label_path)
+            name_list.append(image_name)
+        return img_list, label_list, name_list
+    def _encode_label(self, labelmap):
+        shape = labelmap.shape
+        encoded_labelmap = np.zeros(shape=(shape[0], shape[1]), dtype=np.uint8)
+        for i, class_id in enumerate(self.cfg.label_list):
+            encoded_labelmap[labelmap == class_id] = i
+        return encoded_labelmap
+    def __repr__(self):
+        return self.__class__.__name__ + \
+               f'rank: {self.rank} task: {self.task_name} mode:{"training" if self.is_train else "inference"} ' \
+               f'dataset_len:{len(self.img_list)} id_num:{self.cfg["num_classes"]} augmentation: {self.augs}'
+class LIPParsingDataset(Human3M6ParsingDataset):
+    """
+    0:'background',
+    1:'hat',
+    2:'hair',
+    3:'glove',
+    4:'sunglasses',
+    5:'upperclothes',
+    6:'dress',
+    7:'coat',
+    8:'socks',
+    9:'pants',
+    10:'jumpsuits',
+    11:'scarf',
+    12:'skirt',
+    13:'face',
+    14:'leftArm',
+    15:'rightArm',
+    16:'leftLeg',
+    17:'rightLeg',
+    18:'leftShoe',
+    19:'rightShoe'
+    """
+    task_name = 'LIP_parsing'
+    left_right_pairs = np.array(
+        [[14, 15], [16, 17], [18, 19]]
+    )
+    label_mapper = np.arange(60)
+    evaluate_size = ()
+    def __init__(self,
+                 ginfo,
+                 data_path,
+                 dataset='train',
+                 data_use_ratio=1,
+                 is_train=True,
+                 cfg=None,
+                 **kwargs):
+        super(LIPParsingDataset, self).__init__(ginfo=ginfo, data_path=data_path,
+                                                data_use_ratio=data_use_ratio,
+                                                dataset=dataset, is_train=is_train,
+                                                cfg=cfg, **kwargs)
+    def _list_dirs(self, data_path):
+        img_list = list()
+        label_list = list()
+        name_list = list()
+        if self.dataset == 'train':
+            train_type = 'train'
+        elif self.dataset == 'val':
+            train_type = 'val'
+        """
+        - LIP
+            -data
+                -train_id.txt
+                -train_images
+                    -1000_1234574.jpg
+                -val_images
+                -val_id.txt
+            -Trainval_parsing_annotations
+                -train_segmentations
+                    -1000_1234574.png
+        """
+        list_txt = osp.join(data_path, 'data', f'{train_type}_id.txt')
+        with open(list_txt, 'r') as f:
+            data = f.readlines()
+        data = [d.strip() for d in data]
+        if self.data_use_ratio != 1:
+            data = random.sample(data, int(len(data) * self.data_use_ratio))
+        postfix_img = '.jpg'
+        postfix_ann = '.png'
+        for d in data:
+            img_path = osp.join(data_path, f'data/{train_type}_images', d + postfix_img)
+            image_name = d
+            label_path = osp.join(data_path, f'TrainVal_parsing_annotations/{train_type}_segmentations',
+                                  d + postfix_ann)
+            img_list.append(img_path)
+            label_list.append(label_path)
+            name_list.append(image_name)
+        return img_list, label_list, name_list
+class CIHPParsingDataset(Human3M6ParsingDataset):
+    """
+    0:'background',
+    1:'hat',
+    2:'hair',
+    3:'glove',
+    4:'sunglasses',
+    5:'upperclothes',
+    6:'dress',
+    7:'coat',
+    8:'socks',
+    9:'pants',
+    10:'torsoSkin',
+    11:'scarf',
+    12:'skirt',
+    13:'face',
+    14:'leftArm',
+    15:'rightArm',
+    16:'leftLeg',
+    17:'rightLeg',
+    18:'leftShoe',
+    19:'rightShoe'
+    """
+    task_name = 'CIHP_parsing'
+    left_right_pairs = np.array(
+        [[14, 15], [16, 17], [18, 19]]
+    )
+    label_mapper = np.arange(60)
+    evaluate_size = ()
+    def __init__(self,
+                 ginfo,
+                 data_path,
+                 dataset='train',
+                 data_use_ratio=1,
+                 is_train=True,
+                 cfg=None,
+                 **kwargs):
+        super(CIHPParsingDataset, self).__init__(ginfo=ginfo, data_path=data_path,data_use_ratio=data_use_ratio,
+                                                 dataset=dataset, is_train=is_train,
+                                                 cfg=cfg, **kwargs)
+    def _list_dirs(self, data_path):
+        img_list = list()
+        label_list = list()
+        name_list = list()
+        if self.dataset == 'train':
+            train_type = 'train'
+        elif self.dataset == 'val':
+            train_type = 'val'
+        """
+        - CHIP
+            -instance-level_human_parsing
+                -Training
+                    -Images
+                        -0008522.jpg
+                    -Category_ids
+                        -0008522.png
+                    -train_id.txt
+                -Validation
+                    -val_id.txt
+        """
+        Infix = 'Training' if train_type == 'train' else 'Validation'
+        list_txt = osp.join(data_path, 'instance-level_human_parsing', Infix, f'{train_type}_id.txt')
+        with open(list_txt, 'r') as f:
+            data = f.readlines()
+        data = [d.strip() for d in data]
+        if self.data_use_ratio != 1:
+            data = random.sample(data, int(len(data) * self.data_use_ratio))
+        postfix_img = '.jpg'
+        postfix_ann = '.png'
+        for d in data:
+            img_path = osp.join(data_path, 'instance-level_human_parsing', Infix, f'Images', d + postfix_img)
+            image_name = d
+            label_path = osp.join(data_path, 'instance-level_human_parsing', Infix, 'Category_ids', d + postfix_ann)
+            img_list.append(img_path)
+            label_list.append(label_path)
+            name_list.append(image_name)
+        return img_list, label_list, name_list
+class ATRParsingDataset(Human3M6ParsingDataset):
+    """
+    0:'background', #
+    1:'hat', #
+    2:'hair',#
+    3:'sunglasses',#
+    4:'upperclothes',#
+    5:'skirt',
+    6:'pants',#
+    7:'dress',#
+    8:'belt',
+    9:'leftshoe',#
+    10:'rightshoe',#
+    11:'face',#
+    12:'leftleg',#
+    13:'rightleg',#
+    14:'leftarm',#
+    15:'rightarm',#
+    16:'bag',#
+    17:'scarf',#
+    """
+    task_name = 'ATR_parsing'
+    left_right_pairs = np.array(
+        [[9,10], [12,13], [14,15]]
+    )
+    label_mapper = np.arange(60)
+    evaluate_size = ()
+    def __init__(self,
+                 ginfo,
+                 data_path,
+                 dataset='train',
+                 data_use_ratio=1,
+                 is_train=True,
+                 cfg=None,
+                 **kwargs):
+        super(ATRParsingDataset, self).__init__(ginfo=ginfo, data_path=data_path,
+                                                data_use_ratio=data_use_ratio,
+                                                dataset=dataset, is_train=is_train,
+                                                cfg=cfg, **kwargs)
+    def _list_dirs(self, data_path):
+        img_list = list()
+        label_list = list()
+        name_list = list()
+        if self.dataset == 'train':
+            train_type = 'train'
+        elif self.dataset == 'val':
+            train_type = 'val'
+        """
+        - ATR
+            -humanparsing
+                -JPEGImages
+                -SegmentationClassAug
+            -train_id.txt
+            -val_id.txt
+        """
+        list_txt = osp.join(data_path, f'{train_type}_id.txt')
+        with open(list_txt, 'r') as f:
+            data = f.readlines()
+        data = [d.strip() for d in data]
+        if self.data_use_ratio != 1:
+            data = random.sample(data, int(len(data) * self.data_use_ratio))
+        postfix_img = '.jpg'
+        postfix_ann = '.png'
+        for d in data:
+            img_path = osp.join(data_path, f'humanparsing/JPEGImages', d + postfix_img)
+            image_name = d
+            label_path = osp.join(data_path, f'humanparsing/SegmentationClassAug',
+                                  d + postfix_ann)
+            img_list.append(img_path)
+            label_list.append(label_path)
+            name_list.append(image_name)
+        return img_list, label_list, name_list
+class DeepFashionParsingDataset(Human3M6ParsingDataset):
+    """
+    0:'background', #
+    1:'hat', #
+    2:'hair',#
+    3:'sunglasses',#
+    4:'upperclothes',#
+    5:'skirt',
+    6:'pants',#
+    7:'dress',#
+    8:'belt',
+    9:'leftshoe',#
+    10:'rightshoe',#
+    11:'face',#
+    12:'leftleg',#
+    13:'rightleg',#
+    14:'leftarm',#
+    15:'rightarm',#
+    16:'bag',#
+    17:'scarf',#
+    """
+    task_name = 'DeepFashion_parsing'
+    label_mapper = np.arange(60)
+    left_right_pairs = None
+    evaluate_size = ()
+    def __init__(self,
+                 ginfo,
+                 data_path,
+                 dataset='train',
+                 data_use_ratio=1,
+                 is_train=True,
+                 cfg=None,
+                 **kwargs):
+        super(DeepFashionParsingDataset, self).__init__(ginfo=ginfo, data_path=data_path,
+                                                        data_use_ratio=data_use_ratio,
+                                                        dataset=dataset, is_train=is_train,
+                                                        cfg=cfg, **kwargs)
+    def _list_dirs(self, data_path):
+        img_list = list()
+        label_list = list()
+        name_list = list()
+        if self.dataset == 'train':
+            train_type = 'train'
+        elif self.dataset == 'val':
+            train_type = 'val'
+        """
+        - DeepFashion
+            -humanparsing
+                -JPEGImages
+                -SegmentationClassAug
+            -train_id.txt
+            -val_id.txt
+        """
+        list_txt = osp.join(data_path, f'{train_type}_id.txt')
+        with open(list_txt, 'r') as f:
+            data = f.readlines()
+        data = [d.strip() for d in data]
+        if self.data_use_ratio != 1:
+            data = random.sample(data, int(len(data) * self.data_use_ratio))
+        postfix_img = '.jpg'
+        postfix_ann = '.png'
+        if train_type == 'train':
+            for d in data:
+                img_path = osp.join(data_path, f'train/image', d + postfix_img)
+                image_name = d
+                label_path = osp.join(data_path, f'train/seg',
+                                      d + postfix_ann)
+                img_list.append(img_path)
+                label_list.append(label_path)
+                name_list.append(image_name)
+            return img_list, label_list, name_list
+        else:
+            raise ValueError("not implement")
+class VIPParsingDataset(Human3M6ParsingDataset):
+    task_name = 'VIP_parsing'
+    left_right_pairs = np.array(
+        [[14, 15], [16, 17], [18, 19]]
+    )
+    label_mapper = np.arange(60)
+    evaluate_size = ()
+    def __init__(self,
+                 ginfo,
+                 data_path,
+                 dataset='train',
+                 data_use_ratio=1,
+                 is_train=True,
+                 cfg=None,
+                 **kwargs):
+        super(VIPParsingDataset, self).__init__(ginfo=ginfo, data_path=data_path,data_use_ratio=data_use_ratio,
+                                                dataset=dataset, is_train=is_train,
+                                                cfg=cfg, **kwargs)
+    def _list_dirs(self, data_path):
+        img_list = list()
+        label_list = list()
+        name_list = list()
+        if self.dataset == 'train':
+            train_type = 'train'
+        elif self.dataset == 'val':
+            train_type = 'val'
+        list_txt = osp.join(data_path, f'{train_type}_id.txt')
+        with open(list_txt, 'r') as f:
+            data = f.readlines()
+        data = [d.strip() for d in data]
+        postfix_img = '.jpg'
+        postfix_ann = '.png'
+        if self.data_use_ratio != 1:
+            data = random.sample(data, int(len(data) * self.data_use_ratio))
+        if train_type == 'train':
+            for d in data:
+                img_path = osp.join(data_path, f'Images', d + postfix_img)
+                image_name = d
+                label_path = osp.join(data_path, f'Annotations/Category_ids',
+                                      d + postfix_ann)
+                img_list.append(img_path)
+                label_list.append(label_path)
+                name_list.append(image_name)
+            return img_list, label_list, name_list
+        else:
+            raise ValueError("not implement")
+class PaperDollParsingDataset(Human3M6ParsingDataset):
+    """
+    0:'background',
+    1:'hat',
+    2:'hair',
+    3:'glove',
+    4:'sunglasses',
+    5:'upperclothes',
+    6:'dress',
+    7:'coat',
+    8:'socks',
+    9:'pants',
+    10:'torsoSkin',
+    11:'scarf',
+    12:'skirt',
+    13:'face',
+    14:'leftArm',
+    15:'rightArm',
+    16:'leftLeg',
+    17:'rightLeg',
+    18:'leftShoe',
+    19:'rightShoe'
+    """
+    task_name = 'PaperDoll_parsing'
+    left_right_pairs = np.array(
+        [[14, 15], [16, 17], [18, 19]]
+    )
+    label_mapper = np.arange(60)
+    evaluate_size = ()
+    def __init__(self,
+                 ginfo,
+                 data_path,
+                 data_use_ratio=1,
+                 dataset='train',
+                 is_train=True,
+                 cfg=None,
+                 **kwargs):
+        super(PaperDollParsingDataset, self).__init__(ginfo=ginfo, data_path=data_path, data_use_ratio=data_use_ratio,
+                                                      dataset=dataset, is_train=is_train,
+                                                      cfg=cfg, **kwargs)
+    def _list_dirs(self, data_path):
+        img_list = list()
+        label_list = list()
+        name_list = list()
+        if self.dataset == 'train':
+            train_type = 'train'
+        elif self.dataset == 'val':
+            train_type = 'val'
+        """
+        - PaperDoll_folder
+            - TrainVal_parsing_annotations/
+                - 0000000.png
+            - images
+                - 0000000.jpg
+        """
+        list_txt = osp.join(data_path, f'{train_type}_id.txt')
+        with open(list_txt, 'r') as f:
+            data = f.readlines()
+        data = [d.strip() for d in data]
+        postfix_img = '.jpg'
+        postfix_ann = '.png'
+        if self.data_use_ratio != 1:
+            data = random.sample(data, int(len(data) * self.data_use_ratio))
+        for d in data:
+            img_path = osp.join(data_path, 'images', d + postfix_img)
+            image_name = d
+            label_path = osp.join(data_path, 'TrainVal_parsing_annotations/', d + postfix_ann)
+            img_list.append(img_path)
+            label_list.append(label_path)
+            name_list.append(image_name)
+        return img_list, label_list, name_list
+class FashionPediaParsingDataset(Human3M6ParsingDataset):
+    task_name = 'FashionPedia_parsing'
+    label_mapper = np.arange(60)
+    evaluate_size = ()
+    def __init__(self,
+                 ginfo,
+                 data_path,
+                 data_use_ratio=1,
+                 dataset='train',
+                 is_train=True,
+                 cfg=None,
+                 **kwargs):
+        super(FashionPediaParsingDataset, self).__init__(ginfo=ginfo, data_path=data_path,
+                                                         data_use_ratio=data_use_ratio,
+                                                         dataset=dataset, is_train=is_train,
+                                                         cfg=cfg, **kwargs)
+    def _list_dirs(self, data_path):
+        img_list = list()
+        label_list = list()
+        name_list = list()
+        if self.dataset == 'train':
+            train_type = 'train'
+        elif self.dataset == 'val':
+            train_type = 'val'
+        list_txt = osp.join(data_path, f'{train_type}_id.txt')
+        with open(list_txt, 'r') as f:
+            data = f.readlines()
+        data = [d.strip() for d in data]
+        postfix_img = '.jpg'
+        postfix_ann = '.png'
+        if self.data_use_ratio != 1:
+            data = random.sample(data, int(len(data) * self.data_use_ratio))
+        if train_type == 'train':
+            for d in data:
+                img_path = osp.join(data_path, 'train/', d + postfix_img)
+                image_name = d
+                label_path = osp.join(data_path, 'train_annotation/', d + postfix_ann)
+                img_list.append(img_path)
+                label_list.append(label_path)
+                name_list.append(image_name)
+            return img_list, label_list, name_list
+        elif train_type == 'val':
+            for d in data:
+                img_path = osp.join(data_path, 'test/', d + postfix_img)
+                image_name = d
+                label_path = osp.join(data_path, 'test_annotation/', d + postfix_ann)
+                img_list.append(img_path)
+                label_list.append(label_path)
+                name_list.append(image_name)
+            return img_list, label_list, name_list
+        else:
+            raise
+class ModaNetParsingDataset(Human3M6ParsingDataset):
+    """
+    modanet_par = {
+    0: 'Background',
+    1: 'Bag',
+    2: 'Belt',
+    3: 'Boots',
+    4: 'Footwear',
+    5: 'Outer',
+    6: 'Dress',
+    7: 'Sunglasses',
+    8: 'Pants',
+    9: 'Top',
+    10: 'Shorts',
+    11: 'Skirt',
+    12: 'Headwear',
+    13: 'Scarf & Tie'
+}
+    """
+    task_name = 'ModaNet_parsing'
+    label_mapper = np.arange(60)
+    left_right_pairs = None
+    evaluate_size = ()
+    def __init__(self,
+                 ginfo,
+                 data_path,
+                 dataset='train',
+                 data_use_ratio=1,
+                 is_train=True,
+                 cfg=None,
+                 **kwargs):
+        super(ModaNetParsingDataset, self).__init__(ginfo=ginfo, data_path=data_path,data_use_ratio=data_use_ratio,
+                                                    dataset=dataset, is_train=is_train,
+                                                    cfg=cfg, **kwargs)
+    def _list_dirs(self, data_path):
+        img_list = list()
+        label_list = list()
+        name_list = list()
+        # image_dir = osp.join(data_path, 'protocal_1', 'rgb')
+        # label_dir = osp.join(data_path, 'protocal_1', 'seg')
+        if self.dataset == 'train':
+            train_type = 'train'
+        elif self.dataset == 'val':
+            train_type = 'val'
+        list_txt = osp.join(data_path, f'{train_type}_id.txt')
+        with open(list_txt, 'r') as f:
+            data = f.readlines()
+        data = [d.strip() for d in data]
+        if self.data_use_ratio != 1:
+            data = random.sample(data, int(len(data) * self.data_use_ratio))
+        postfix_img = '.jpg'
+        postfix_ann = '.png'
+        if train_type == 'train':
+            for d in data:
+                img_path = osp.join(data_path, f'images', d + postfix_img)
+                image_name = d
+                label_path = osp.join(data_path, f'seg',
+                                      d + postfix_ann)
+                img_list.append(img_path)
+                label_list.append(label_path)
+                name_list.append(image_name)
+            return img_list, label_list, name_list
+        else:
+            raise ValueError("not implement")
+class MHPParsingDataset(Human3M6ParsingDataset):
+    task_name = 'MHP_parsing'
+    label_mapper = np.arange(60)
+    left_right_pairs = None
+    evaluate_size = ()
+    def __init__(self,
+                 ginfo,
+                 data_path,
+                 dataset='train',
+                 data_use_ratio=1,
+                 is_train=True,
+                 cfg=None,
+                 **kwargs):
+        super(ModaNetParsingDataset, self).__init__(ginfo=ginfo, data_path=data_path,data_use_ratio=data_use_ratio,
+                                                    dataset=dataset, is_train=is_train,
+                                                    cfg=cfg, **kwargs)
+    def _list_dirs(self, data_path):
+        img_list = list()
+        label_list = list()
+        name_list = list()
+        if self.dataset == 'train':
+            train_type = 'train'
+        elif self.dataset == 'val':
+            train_type = 'val'
+        list_txt = osp.join(data_path, f'{train_type}_id.txt')
+        with open(list_txt, 'r') as f:
+            data = f.readlines()
+        data = [d.strip() for d in data]
+        if self.data_use_ratio != 1:
+            data = random.sample(data, int(len(data) * self.data_use_ratio))
+        postfix_img = '.jpg'
+        postfix_ann = '.png'
+        if train_type == 'train':
+            for d in data:
+                img_path = osp.join(data_path, f'images/', d + postfix_img)
+                image_name = d
+                label_path = osp.join(data_path, f'processed_label/',
+                                      d + postfix_ann)
+                img_list.append(img_path)
+                label_list.append(label_path)
+                name_list.append(image_name)
+            return img_list, label_list, name_list
+        else:
+            raise ValueError("not implement")

core/data/datasets/images/pedattr_dataset.py ADDED Viewed

	@@ -0,0 +1,665 @@

+import os
+import time
+import pickle
+import random
+from easydict import EasyDict as edict
+import numpy as np
+import torch.utils.data as data
+from PIL import Image
+from core.data.transforms.pedattr_transforms import PedAttrAugmentation, PedAttrTestAugmentation, PedAttrRandomAugmentation
+import torch.distributed as dist
+__all__ = ['AttrDataset', 'MultiAttrDataset']
+def merge_pedattr_datasets(data_path_list, root_path_list, dataset_name_list, train,
+                           data_use_ratio, text_label_return, select_data, ignore_other_attrs=True):
+    total_img_id = []
+    total_attr_num = 0
+    total_img_num = 0
+    total_attr_begin = []
+    total_attr_end = []
+    total_img_begin = []
+    total_img_end = []
+    total_text_dict = {}
+    attr_begin = []
+    attr_end = []
+    for data_path, root_path, dataset_name in zip(data_path_list, root_path_list, dataset_name_list):
+        assert dataset_name in ['peta', 'PA_100k', 'rap', 'rap2', 'uavhuman', 'HARDHC',
+                                'ClothingAttribute', 'parse27k', 'duke', 'market','lup_0_200w', 'lup_0_600w', 'lup_600_1200w'], \
+            'dataset name {} is not exist'.format(dataset_name)
+        with open(data_path, 'rb') as f:
+            dataset_info = pickle.load(f)
+        dataset_info = edict(dataset_info)
+        img_id = dataset_info.image_name
+        attr_label = dataset_info.label
+        if train:
+            split = 'trainval'
+        else:
+            split = 'test'
+        attr_id = dataset_info.attr_name
+        attr_num = len(attr_id)
+        total_attr_begin.append(total_attr_num)
+        total_attr_num = total_attr_num + attr_num
+        total_attr_end.append(total_attr_num)
+        if select_data is None or (select_data!= None and dataset_name == select_data):
+            assert split in dataset_info.partition.keys(), f'split {split} is not exist'
+            img_idx = dataset_info.partition[split]
+            if isinstance(img_idx, list):
+                img_idx = img_idx[0]  # default partition 0
+            if data_use_ratio != 1:
+                img_idx = random.sample(list(img_idx), int(len(img_idx) * data_use_ratio))
+            img_num = len(img_idx)
+            img_idx = np.array(img_idx)
+            img_id = [os.path.join(root_path, img_id[i]) for i in img_idx]
+            label = attr_label[img_idx]
+            total_img_begin.append(total_img_num)
+            total_img_num = total_img_num + len(img_id)
+            total_img_end.append(total_img_num)
+        else:
+            # when testing on a single dataset, split may not exist in other datasets. therefore, we need to set a fake
+            # split to make the code run. and the number of images in this fake split is 0.
+            # TODO: find a better way to solve this problem. e.g., use a time for-loop to load the select dataset
+            img_id = []
+            label = []
+            img_num = 0
+            total_img_begin.append(total_img_num)
+            total_img_num = total_img_num + len(img_id)
+            total_img_end.append(total_img_num)
+    infilling_class = -1 if ignore_other_attrs else 0
+    total_label = np.full((total_img_num, total_attr_num), infilling_class, dtype=np.int32)
+    select_attr_begin = 0
+    select_attr_end = total_attr_num
+    for index, (data_path, root_path, dataset_name) in enumerate(zip(data_path_list, root_path_list, dataset_name_list)):
+        assert dataset_name in ['peta', 'PA_100k', 'rap', 'rap2', 'uavhuman', 'HARDHC',
+                                'ClothingAttribute', 'parse27k', 'duke', 'market','lup_0_200w', 'lup_0_600w', 'lup_600_1200w'], \
+            'dataset name {} is not exist'.format(dataset_name)
+        with open(data_path, 'rb') as f:
+            dataset_info = pickle.load(f)
+        dataset_info = edict(dataset_info)
+        img_id = dataset_info.image_name
+        attr_label = dataset_info.label
+        if train:
+            split = 'trainval'
+        else:
+            split = 'test'
+        if not train and dataset_name != select_data:
+            continue
+        assert split in dataset_info.partition.keys(), f'split {split} is not exist'
+        attr_id = dataset_info.attr_name
+        attr_num = len(attr_id)
+        img_idx = dataset_info.partition[split]
+        if isinstance(img_idx, list):
+            img_idx = img_idx[0]  # default partition 0
+        if data_use_ratio != 1:
+            img_idx = random.sample(list(img_idx), int(len(img_idx) * data_use_ratio))
+        img_num = len(img_idx)
+        img_idx = np.array(img_idx)
+        img_id = [os.path.join(root_path, img_id[i]) for i in img_idx]
+        label = attr_label[img_idx]
+        # import pdb;pdb.set_trace()
+        if text_label_return:
+            for idx in range(attr_num):
+                total_text_dict[total_attr_begin[index] + idx] = eval(f"{dataset_name}_attr_name")[idx]
+        if not train:
+            if dataset_name == select_data:
+                total_label[total_img_begin[index]: total_img_end[index], total_attr_begin[index]: total_attr_end[index]] = label
+                total_img_id.extend(img_id)
+                attr_begin.extend([total_attr_begin[index] for i in img_idx])
+                attr_end.extend([total_attr_end[index] for i in img_idx])
+        else:
+            total_label[total_img_begin[index]: total_img_end[index], total_attr_begin[index]: total_attr_end[index]] = label
+            total_img_id.extend(img_id)
+            attr_begin.extend([total_attr_begin[index] for i in img_idx])
+            attr_end.extend([total_attr_end[index] for i in img_idx])
+    # import pdb;pdb.set_trace()
+    return total_img_id, total_label, total_text_dict, attr_begin, attr_end
+class MultiAttrDataset(data.Dataset):
+    def __init__(self, ginfo, augmentation, task_spec, train=True, data_use_ratio=1, text_label_return=False,
+                 select_data=None, ignore_other_attrs=True,
+                 **kwargs):
+        data_path = task_spec.data_path
+        root_path = task_spec.root_path
+        dataset_name = task_spec.dataset
+        # import pdb; pdb.set_trace()
+        self.rank = dist.get_rank()
+        self.train = train
+        self.img_id, self.label, self.text_dict, self.attr_begin, self.attr_end = \
+            merge_pedattr_datasets(data_path, root_path, dataset_name, train,
+                                   data_use_ratio, text_label_return, select_data, ignore_other_attrs)
+        height = augmentation.height
+        width = augmentation.width
+        self.img_num = len(self.img_id)
+        if train:
+            self.transform = PedAttrAugmentation(height, width)
+            if augmentation.get('use_random_aug', False):
+                self.transform = PedAttrRandomAugmentation(height, width, \
+                    augmentation.use_random_aug.m, augmentation.use_random_aug.n)
+        else:
+            self.transform = PedAttrTestAugmentation(height, width)
+        self.task_name = ginfo.task_name
+    def __getitem__(self, index):
+        return self.read_one(index)
+    def __len__(self):
+        return len(self.img_id)
+    def read_one(self, idx=None):
+        if idx == None:
+            idx = np.random.randint(self.img_num)
+        imgname, gt_label = self.img_id[idx], self.label[idx]
+        imgpath = imgname
+        try:
+            img = Image.open(imgpath).convert('RGB')
+            if self.transform is not None:
+                img = self.transform(img)
+            gt_label = gt_label.astype(np.float32)
+            output = {}
+            output = {'image': img, 'label': gt_label, 'filename': imgname, 'attr_begin': self.attr_begin[idx], 'attr_end': self.attr_end[idx]}
+            return output
+        except:
+            print('{} load failed'.format(imgpath))
+            return self.read_one()
+    def __repr__(self):
+        return self.__class__.__name__ + \
+               f'rank: {self.rank} task: {self.task_name} mode:{"training" if self.train else "inference"} ' \
+               f'dataset_len:{len(self.img_id)} augmentation: {self.transform}'
+rap2_attr_name =  {
+0: {0:'without a bald head',1:'with a bald head'},
+1: {0:'with short hair',1:'with long hair'},
+2: {0:'with non-black hair',1:'with black hair'},
+3: {0:'without a hat',1:'with a hat'},
+4: {0:'without glasses',1:'with glasses'},
+5: {0:'without a shirt',1:'with a shirt'},
+6: {0:'without a sweater',1:'with a sweater'},
+7: {0:'without a vest',1:'with a vest'},
+8: {0:'without a t-shirt',1:'with a t-shirt'},
+9: {0:'without cotton',1:'with cotton'},
+10: {0:'without a jacket',1:'with a jacket'},
+11: {0:'without formal wear',1:'with formal wear'},
+12: {0:'without tight clothes',1:'with tight clothes'},
+13: {0:'without short sleeves',1:'with short sleeves'},
+14: {0:'without other upper-body clothing',1:'with other upper-body clothing'},
+15: {0:'without long trousers',1:'with long trousers'},
+16: {0:'without a skirt',1:'with a skirt'},
+17: {0:'without a short skirt',1:'with a short skirt'},
+18: {0:'without a dress',1:'with a dress'},
+19: {0:'without jeans',1:'with jeans'},
+20: {0:'without tight trousers',1:'with tight trousers'},
+21: {0:'without leather shoes',1:'with leather shoes'},
+22: {0:'without sport shoes',1:'with sport shoes'},
+23: {0:'without boots',1:'with boots'},
+24: {0:'without cloth shoes',1:'with cloth shoes'},
+25: {0:'without casual shoes',1:'with casual shoes'},
+26: {0:'without other shoes',1:'with other shoes'},
+27: {0:'without a backpack',1:'with a backpack'},
+28: {0:'without a shoulder bag',1:'with a shoulder bag'},
+29: {0:'without a handbag',1:'with a handbag'},
+30: {0:'without a box',1:'with a box'},
+31: {0:'without a plastic bag',1:'with a plastic bag'},
+32: {0:'without a paper bag',1:'with a paper bag'},
+33: {0:'without a hand trunk',1:'with a hand trunk'},
+34: {0:'without other attachments',1:'with other attachments'},
+35: {0:'age greater than 16',1:'age less than or equal to 16'},
+36: {0:'age less than 17 or greater than 30',1:'age between 17 and 30'},
+37: {0:'age less than 31 or greater than 45',1:'age between 31 and 45'},
+38: {0:'age less than 46 or greater than 60',1:'age between 46 and 60'},
+39: {0:'male',1:'female', 2:'gender unknown'},
+40: {0:'without excess body fat',1:'with excess body fat'},
+41: {0:'without normal body shape',1:'with normal body shape'},
+42: {0:'without thin body shape',1:'with thin body shape'},
+43: {0:'not a customer',1:'is a customer'},
+44: {0:'not an employee',1:'is an employee'},
+45: {0:'not calling',1:'calling'},
+46: {0:'not talking',1:'talking'},
+47: {0:'not gathering',1:'gathering'},
+48: {0:'not holding anything',1:'holding something'},
+49: {0:'not pushing anything',1:'pushing something'},
+50: {0:'not pulling anything',1:'pulling something'},
+51: {0:'not carrying anything in arms',1:'carrying something in arms'},
+52: {0:'not carrying anything in hands',1:'carrying something in hands'},
+53: {0:'no other actions',1:'performing other actions'}
+}
+PA_100k_attr_name =  {
+0: {0:'without a hat',1:'with a hat'},
+1: {0:'without glasses',1:'with glasses'},
+2: {0:'without short sleeves',1:'with short sleeves'},
+3: {0:'without long sleeves',1:'with long sleeves'},
+4: {0:'without stripe upper-clothes',1:'with stripe upper-clothes'},
+5: {0:'without logo upper-clothes',1:'with logo upper-clothes'},
+6: {0:'without plaid upper-clothes',1:'with plaid upper-clothes'},
+7: {0:'without splice upper-clothes',1:'with splice upper-clothes'},
+8: {0:'without stripe lower-clothes',1:'with stripe lower-clothes'},
+9: {0:'without pattern lower-clothes',1:'with pattern lower-clothes'},
+10: {0:'without long coat',1:'with long coat'},
+11: {0:'without long trousers',1:'with long trousers'},
+12: {0:'without short trousers',1:'with short trousers'},
+13: {0:'without skirt or dress',1:'with skirt or dress'},
+14: {0:'without boots',1:'with boots'},
+15: {0:'without a handbag',1:'with a handbag'},
+16: {0:'without a shoulder bag',1:'with a shoulder bag'},
+17: {0:'without a backpack',1:'with a backpack'},
+18: {0:'not hold objects in front',1:'hold objects in front'},
+19: {0:'age less than or equal to 60',1:'age greater than 60'},
+20: {0:'age less than 18 or greater than 60',1:'age between 18 and 60'},
+21: {0:'age greater than or equal to 18',1:'age less than 18'},
+22: {0:'male',1:'female', 2:'gender unknown'},
+23: {0:'not in the front position',1:'in the front position'},
+24: {0:'not in the side position',1:'in the side position'},
+25: {0:'not in the back position',1:'in the back position'},
+}
+HARDHC_attr_name =  {
+0: {0:'female', 1:'male', -1:'gender unknown'},
+1: {0:'with short hair',1:'with long hair'},
+2: {0:'without sunglass',1:'with sunglass'},
+3: {0:'without a hat',1:'with a hat'},
+4: {0:'without T-skirt',1:'with T-skirt'},
+5: {0:'without long sleeves',1:'with long sleeves'},
+6: {0:'without formal clothes',1:'with formal clothes'},
+7: {0:'without short trousers',1:'with short trousers'},
+8: {0:'without jeans',1:'with jeans'},
+9: {0:'without long pants',1:'with long pants'},
+10: {0:'without skirt',1:'with skirt'},
+11: {0:'without face mask',1:'with face mask'},
+12: {0:'without logo clothes',1:'with logo clothes'},
+13: {0:'without stripe clothes',1:'with stripe clothes'},
+}
+parse27k_attr_name =  {
+0: {0:'without a bald head',1:'with a bald head'},
+1: {0:'with short hair',1:'with long hair'},
+2: {0:'with non-black hair',1:'with black hair'},
+3: {0:'without a hat',1:'with a hat'},
+4: {0:'without glasses',1:'with glasses'},
+5: {0:'without a shirt',1:'with a shirt'},
+6: {0:'without a sweater',1:'with a sweater'},
+7: {0:'without a vest',1:'with a vest'},
+8: {0:'without a t-shirt',1:'with a t-shirt'},
+9: {0:'without cotton',1:'with cotton'},
+10: {0:'without a jacket',1:'with a jacket'},
+11: {0:'without formal wear',1:'with formal wear'},
+12: {0:'without tight clothes',1:'with tight clothes'},
+13: {0:'without short sleeves',1:'with short sleeves'},
+14: {0:'without other upper-body clothing',1:'with other upper-body clothing'},
+15: {0:'without long trousers',1:'with long trousers'},
+16: {0:'without a skirt',1:'with a skirt'},
+17: {0:'without a short skirt',1:'with a short skirt'},
+18: {0:'without a dress',1:'with a dress'},
+19: {0:'without jeans',1:'with jeans'},
+20: {0:'without tight trousers',1:'with tight trousers'},
+21: {0:'without leather shoes',1:'with leather shoes'},
+22: {0:'without sport shoes',1:'with sport shoes'},
+23: {0:'without boots',1:'with boots'},
+24: {0:'without cloth shoes',1:'with cloth shoes'},
+25: {0:'without casual shoes',1:'with casual shoes'},
+26: {0:'without other shoes',1:'with other shoes'},
+27: {0:'without a backpack',1:'with a backpack'},
+28: {0:'without a shoulder bag',1:'with a shoulder bag'},
+29: {0:'without a handbag',1:'with a handbag'},
+30: {0:'without a box',1:'with a box'},
+31: {0:'without a plastic bag',1:'with a plastic bag'},
+32: {0:'without a paper bag',1:'with a paper bag'},
+33: {0:'without a hand trunk',1:'with a hand trunk'},
+34: {0:'without other attachments',1:'with other attachments'},
+35: {0:'age greater than 16',1:'age less than or equal to 16'},
+36: {0:'age less than 17 or greater than 30',1:'age between 17 and 30'},
+37: {0:'age less than 31 or greater than 45',1:'age between 31 and 45'},
+38: {0:'age less than 46 or greater than 60',1:'age between 46 and 60'},
+39: {0:'male',1:'female', 2:'gender unknown'},
+40: {0:'without excess body fat',1:'with excess body fat'},
+41: {0:'without normal body shape',1:'with normal body shape'},
+42: {0:'without thin body shape',1:'with thin body shape'},
+43: {0:'not a customer',1:'is a customer'}
+}
+uavhuman_attr_name = {
+0: {0:'female',1:'male'},
+1: {0:'without red backpack',1:'with red backpack'},
+2: {0:'without black backpack',1:'with black backpack'},
+3: {0:'without green backpack',1:'with green backpack'},
+4: {0:'without yellow backpack',1:'with yellow backpack'},
+5: {0:'without other backpack',1:'with other backpack'},
+6: {0:'without red hat',1:'with red hat'},
+7: {0:'without black hat',1:'with black hat'},
+8: {0:'without yellow hat',1:'with yellow hat'},
+9: {0:'without white hat',1:'with white hat'},
+10: {0:'without other hat',1:'with other hat'},
+11: {0:'without red upper-clothes',1:'with red upper-clothes'},
+12: {0:'without black upper-clothes',1:'with black upper-clothes'},
+13: {0:'without blue upper-clothes',1:'with blue upper-clothes'},
+14: {0:'without green upper-clothes',1:'with green upper-clothes'},
+15: {0:'without multicolor upper-clothes',1:'with multicolor upper-clothes'},
+16: {0:'without grey upper-clothes',1:'with grey upper-clothes'},
+17: {0:'without white upper-clothes',1:'with white upper-clothes'},
+18: {0:'without yellow upper-clothes',1:'with yellow upper-clothes'},
+19: {0:'without dark brown upper-clothes',1:'with dark brown upper-clothes'},
+20: {0:'without purple upper-clothes',1:'with purple upper-clothes'},
+21: {0:'without pink upper-clothes',1:'with pink upper-clothes'},
+22: {0:'without other upper-clothes',1:'with other upper-clothes'},
+23: {0:'without long upper-clothes style',1:'with long upper-clothes style'},
+24: {0:'without short upper-clothes style',1:'with short upper-clothes style'},
+25: {0:'without skirt upper-clothes style',1:'with skirt upper-clothes style'},
+26: {0:'without other upper-clothes style',1:'with other upper-clothes style'},
+27: {0:'without red lower clothes',1:'with red lower clothes'},
+28: {0:'without black lower clothes',1:'with black lower clothes'},
+29: {0:'without blue lower clothes',1:'with blue lower clothes'},
+30: {0:'without green lower clothes',1:'with green lower clothes'},
+31: {0:'without multicolor lower clothes',1:'with multicolor lower clothes'},
+32: {0:'without grey lower clothes',1:'with grey lower clothes'},
+33: {0:'without white lower-clothes',1:'with white lower-clothes'},
+34: {0:'without yellow lower-clothes',1:'with yellow lower-clothes'},
+35: {0:'without dark brown lower-clothes',1:'with dark brown lower-clothes'},
+36: {0:'without purple lower-clothes',1:'with purple lower-clothes'},
+37: {0:'without pink lower-clothes',1:'with pink lower-clothes'},
+38: {0:'without other lower-clothes',1:'with other lower-clothes'},
+39: {0:'without long lower-clothes style',1:'with long lower-clothes style'},
+40: {0:'without short lower-clothes style',1:'with short lower-clothes style'},
+41: {0:'without skirt lower-clothes style',1:'with skirt lower-clothes style'},
+42: {0:'without other lower-clothes style',1:'with other lower-clothes style'}
+}
+market_attr_name = {
+0: {0:'without a backpack',1:'with a backpack'},
+1: {0:'without a bag',1:'with a bag'},
+2: {0:'without a handbag',1:'with a handbag'},
+3: {0:'without black lower-clothes',1:'with black lower-clothes'},
+4: {0:'without blue lower-clothes',1:'with blue lower-clothes'},
+5: {0:'without brown lower-clothes',1:'with brown lower-clothes'},
+6: {0:'without gray lower-clothes',1:'with gray lower-clothes'},
+7: {0:'without green lower-clothes',1:'with green lower-clothes'},
+8: {0:'without pink lower-clothes',1:'with pink lower-clothes'},
+9: {0:'without purple lower-clothes',1:'with purple lower-clothes'},
+10: {0:'without white lower-clothes',1:'with white lower-clothes'},
+11: {0:'without yellow lower-clothes',1:'with yellow lower-clothes'},
+12: {0:'without black upper-clothes',1:'with black upper-clothes'},
+13: {0:'without blue upper-clothes',1:'with blue upper-clothes'},
+14: {0:'without green upper-clothes',1:'with green upper-clothes'},
+15: {0:'without gray upper-clothes',1:'with gray upper-clothes'},
+16: {0:'without purple upper-clothes',1:'with purple upper-clothes'},
+17: {0:'without red upper-clothes',1:'with red upper-clothes'},
+18: {0:'without white upper-clothes',1:'with white upper-clothes'},
+19: {0:'without yellow upper-clothes',1:'with yellow upper-clothes'},
+20: {0:'with dress',1:'with pants'},
+21: {0:'with long lower body clothing',1:'with short lower body clothing'},
+22: {0:'with long sleeve upper body clothing',1:'with short upper body clothing'},
+23: {0:'with short hair',1:'with long hair'},
+24: {0:'without a hat',1:'with a hat'},
+25: {0:'male',1:'female'},
+26: {0:'not a young person',1:'a young person'},
+27: {0:'not a teenager',1:'a teenager'},
+28: {0:'not an adult',1:'an adult'},
+29: {0:'not an old person',1:'an old person'}
+}
+# peta attr name still have some bugs
+peta_attr_name = {
+0: {0:'without hat accessory',1:'with hat accessory'},
+1: {0:'without muffler accessory',1:'with muffler accessory'},
+2: {0:'with accessory',1:'with nothing accessory'},
+3: {0:'without sunglasses accessory',1:'with sunglasses accessory'},
+4: {0:'with short hair',1:'with long hair'},
+5: {0:'without casual upper body wear',1:'with casual upper body wear'},
+6: {0:'without formal upper body wear',1:'with formal upper body wear'},
+7: {0:'without jacket upper body wear',1:'with jacket upper body wear'},
+8: {0:'without logo upper body wear',1:'with logo upper body wear'},
+9: {0:'without plaid upper body wear',1:'with plaid upper body wear'},
+10: {0:'without short sleeve upper body wear',1:'with short sleeve upper body wear'},
+11: {0:'without thin stripes upper body wear',1:'with thin stripes upper body wear'},
+12: {0:'without t-shirt upper body wear',1:'with t-shirt upper body wear'},
+13: {0:'without other upper body wear',1:'with other upper body wear'},
+14: {0:'without vneck upper body wear',1:'with vneck upper body wear'},
+15: {0:'without casual lower body wear',1:'with casual lower body wear'},
+16: {0:'without formal lower body wear',1:'with formal lower body wear'},
+17: {0:'without jeans lower body wear',1:'with jeans lower body wear'},
+18: {0:'without shorts lower body wear',1:'with shorts lower body wear'},
+19: {0:'without shortskirt lower body wear',1:'with shortskirt lower body wear'},
+20: {0:'without trousers lower body wear',1:'with trousers lower body wear'},
+21: {0: 'without leather shoes', 1: 'with leather shoes'},
+22: {0: 'without sandals', 1: 'with sandals'},
+23: {0: 'without shoes', 1: 'with shoes'},
+24: {0: 'without sneaker', 1: 'with sneaker'},
+25: {0: 'without carrying backpack', 1: 'carrying backpack'},
+26: {0: 'with carrying other things', 1: 'carrying other things'},
+27: {0: 'without carrying messengerbag', 1: 'carrying messengerbag'},
+28: {0: 'carrying something', 1: 'carrying nothing'},
+29: {0: 'without carrying plasticbags', 1: 'carrying plasticbags'},
+30: {0:'age greater than or equal to 30',1:'age less than 30'},
+31: {0:'age less than 31 or greater than 45',1:'age between 31 and 45'},
+32: {0:'age less than 46 or greater than 60',1:'age between 46 and 60'},
+33: {0:'age less than or equal to 60',1:'age larger than 60'},
+34: {0:'female',1:'male'}
+}
+duke_attr_name =  {
+0: {0:'without a backpack',1:'with a backpack'},
+1: {0:'without a bag',1:'with a bag'},
+2: {0:'without boots',1:'with boots'},
+3: {0:'without black lower-clothes',1:'with black lower-clothes'},
+4: {0:'without blue lower-clothes',1:'with blue lower-clothes'},
+5: {0:'without brown lower-clothes',1:'with brown lower-clothes'},
+6: {0:'without gray lower-clothes',1:'with gray lower-clothes'},
+7: {0:'without green lower-clothes',1:'with green lower-clothes'},
+8: {0:'without red lower-clothes',1:'with red lower-clothes'},
+9: {0:'without white lower-clothes',1:'with white lower-clothes'},
+10: {0:'male',1:'female'},
+11: {0:'without a handbag',1:'with a handbag'},
+12: {0:'without a hat',1:'with a hat'},
+13: {0:'with dark shoes',1:'with light shoes'},
+14: {0:'short top clothing',1:'long top clothing'},
+15: {0:'without black upper-clothes',1:'with black upper-clothes'},
+16: {0:'without blue upper-clothes',1:'with blue upper-clothes'},
+17: {0:'without brown upper-clothes',1:'with brown upper-clothes'},
+18: {0:'without gray upper-clothes',1:'with gray upper-clothes'},
+19: {0:'without green upper-clothes',1:'with green upper-clothes'},
+20: {0:'without purple upper-clothes',1:'with purple upper-clothes'},
+21: {0:'without red upper-clothes',1:'with red upper-clothes'},
+22: {0:'without white upper-clothes',1:'with white upper-clothes'},
+}
+ClothingAttribute_attr_name =  ['pattern_spot', 'cyan', 'brown', 'v_shape_neckline', 'round_neckline', 'other_neckline', 'no_sleevelength', 'short_sleevelength', 'long_sleevelength', 'pattern_graphics', 'gender', 'black', 'many_colors', 'white', 'pattern_floral', 'collar', 'blue', 'necktie', 'pattern_stripe', 'pattern_solid', 'gray', 'shirt_category', 'sweater_category', 't_shirt_category', 'outerwear_category', 'suit_category', 'tank_top_category', 'dress_category', 'placket', 'pattern_plaid', 'purple', 'scarf', 'green', 'yellow', 'skin_exposure', 'red']
+lup_0_200w_attr_name = {
+0: {0: 'male', 1: 'female', -1:'gender unknown'},
+1: {0: 'age greater than 6', 1: "age less than or equal to 6", -1: 'age unknown'},
+2: {0: 'age less than 7 or greater than 18', 1: "age between 7 and 18", -1: 'age unknown'},
+3: {0: 'age less than 19 or greater than 65', 1: "age between 19 and 65", -1: 'age unknown'},
+4: {0: 'age less than 66', 1: "age greater than or equal to 66", -1: 'age unknown'},
+5: {0: 'with short sleeve coat', 1: 'with long sleeves', -1: 'coat length unknown'},
+6: {0: 'with shorts trousers', 1: 'with long trousers'},
+7: {0: 'without a skirt', 1:'with a skirt'},
+8: {0: 'without a pure pattern coat', 1: 'with a pure upper-clothes'},
+9: {0: 'without a stripe pattern coat', 1: 'with a stripe upper-clothes'},
+10: {0: 'without a design pattern coat', 1: 'with a design upper-clothes'},
+11: {0: 'without a joint pattern coat', 1: 'with a joint upper-clothes'},
+12: {0: 'without a lattic pattern coat', 1: 'with a lattic upper-clothes'},
+13: {0: 'without a black color trousers', 1: 'with black lower-clothes'},
+14: {0: 'without a white color trousers', 1: 'with white lower-clothes'},
+15: {0: 'without a gray color trousers', 1: 'with a gray color trousers'},
+16: {0: 'without a red color trousers', 1: 'with a red color trousers'},
+17: {0: 'without a yellow color trousers', 1: 'with a yellow color trousers'},
+18: {0: 'without a blue color trousers', 1: 'with a blue color trousers'},
+19: {0: 'without a green color trousers', 1: 'with a green color trousers'},
+20: {0: 'without a purple color trousers', 1: 'with a purple color trousers'},
+21: {0: 'without a pure pattern trousers', 1: 'with a pure lower-clothes'},
+22: {0: 'without a stripe pattern trousers', 1: 'with a stripe lower-clothes'},
+23: {0: 'without a design pattern trousers', 1: 'with a design lower-clothes'},
+24: {0: 'without a joint pattern trousers', 1: 'with a joint lower-clothes'},
+25: {0: 'without a lattic pattern trousers', 1: 'with a lattic lower-clothes'},
+26: {0: 'without a hat', 1: 'with a hat', -1: 'hat unknown'},
+27: {0: 'without a jacket', 1: 'with a jacket'},
+28: {0: 'without a sweater', 1: 'with a sweater'},
+29: {0: 'without a long coat', 1: 'with a long coat'},
+30: {0: 'without a shirt', 1: 'with a shirt'},
+31: {0: 'without a dress', 1: 'with a dress'},
+32: {0: 'without a business suit', 1: 'with a business suit'},
+33: {0: 'without a black color coat', 1: 'with a black color coat', -1:'unknown coat color'},
+34: {0: 'without a white color coat', 1: 'with a white color coat', -1:'unknown coat color'},
+35: {0: 'without a gray color coat', 1: 'with a gray color coat', -1:'unknown coat color'},
+36: {0: 'without a red color coat', 1: 'with a red color coat', -1:'unknown coat color'},
+37: {0: 'without a yellow color coat', 1: 'with a yellow color coat', -1:'unknown coat color'},
+38: {0: 'without a blue color coat', 1: 'with a blue color coat', -1:'unknown coat color'},
+39: {0: 'without a green color coat', 1: 'with a green color coat', -1:'unknown coat color'},
+40: {0: 'without a purple color coat', 1: 'with a purple color coat', -1:'unknown coat color'},
+41: {0: 'with short hair', 1: 'with long hair', -1: 'unknown hair style'},
+42: {0: 'without leather shoes', 1: 'with leather shoes'},
+43: {0: 'without boots', 1: 'with boots'},
+44: {0: 'without walking shoes', 1: 'with walking shoes'},
+45: {0: 'without sandal', 1: 'with sandal'},
+46: {0: 'without a bag', 1: 'without a bag', -1: 'unknown bag style'},
+47: {0: 'without glasses', 1: 'with glasses'},
+48: {0: 'not stand', 1: 'stand', -1: 'unknown pose'},
+49: {0: 'not sit', 1: 'sit', -1: 'unknown pose'},
+50: {0: 'not lie', 1: 'lie', -1: 'unknown pose'},
+51: {0: 'not stoop', 1: 'stoop', -1: 'unknown pose'}}
+lup_0_600w_attr_name = {
+0: {0: 'male', 1: 'female', -1:'gender unknown'},
+1: {0: 'age greater than 6', 1: "age less than or equal to 6", -1: 'age unknown'},
+2: {0: 'age less than 7 or greater than 18', 1: "age between 7 and 18", -1: 'age unknown'},
+3: {0: 'age less than 19 or greater than 65', 1: "age between 19 and 65", -1: 'age unknown'},
+4: {0: 'age less than 66', 1: "age greater than or equal to 66", -1: 'age unknown'},
+5: {0: 'with short sleeve coat', 1: 'with long sleeves', -1: 'coat length unknown'},
+6: {0: 'with shorts trousers', 1: 'with long trousers'},
+7: {0: 'without a skirt', 1:'with a skirt'},
+8: {0: 'without a pure pattern coat', 1: 'with a pure upper-clothes'},
+9: {0: 'without a stripe pattern coat', 1: 'with a stripe upper-clothes'},
+10: {0: 'without a design pattern coat', 1: 'with a design upper-clothes'},
+11: {0: 'without a joint pattern coat', 1: 'with a joint upper-clothes'},
+12: {0: 'without a lattic pattern coat', 1: 'with a lattic upper-clothes'},
+13: {0: 'without a black color trousers', 1: 'with black lower-clothes'},
+14: {0: 'without a white color trousers', 1: 'with white lower-clothes'},
+15: {0: 'without a gray color trousers', 1: 'with a gray color trousers'},
+16: {0: 'without a red color trousers', 1: 'with a red color trousers'},
+17: {0: 'without a yellow color trousers', 1: 'with a yellow color trousers'},
+18: {0: 'without a blue color trousers', 1: 'with a blue color trousers'},
+19: {0: 'without a green color trousers', 1: 'with a green color trousers'},
+20: {0: 'without a purple color trousers', 1: 'with a purple color trousers'},
+21: {0: 'without a pure pattern trousers', 1: 'with a pure lower-clothes'},
+22: {0: 'without a stripe pattern trousers', 1: 'with a stripe lower-clothes'},
+23: {0: 'without a design pattern trousers', 1: 'with a design lower-clothes'},
+24: {0: 'without a joint pattern trousers', 1: 'with a joint lower-clothes'},
+25: {0: 'without a lattic pattern trousers', 1: 'with a lattic lower-clothes'},
+26: {0: 'without a hat', 1: 'with a hat', -1: 'hat unknown'},
+27: {0: 'without a jacket', 1: 'with a jacket'},
+28: {0: 'without a sweater', 1: 'with a sweater'},
+29: {0: 'without a long coat', 1: 'with a long coat'},
+30: {0: 'without a shirt', 1: 'with a shirt'},
+31: {0: 'without a dress', 1: 'with a dress'},
+32: {0: 'without a business suit', 1: 'with a business suit'},
+33: {0: 'without a black color coat', 1: 'with a black color coat', -1:'unknown coat color'},
+34: {0: 'without a white color coat', 1: 'with a white color coat', -1:'unknown coat color'},
+35: {0: 'without a gray color coat', 1: 'with a gray color coat', -1:'unknown coat color'},
+36: {0: 'without a red color coat', 1: 'with a red color coat', -1:'unknown coat color'},
+37: {0: 'without a yellow color coat', 1: 'with a yellow color coat', -1:'unknown coat color'},
+38: {0: 'without a blue color coat', 1: 'with a blue color coat', -1:'unknown coat color'},
+39: {0: 'without a green color coat', 1: 'with a green color coat', -1:'unknown coat color'},
+40: {0: 'without a purple color coat', 1: 'with a purple color coat', -1:'unknown coat color'},
+41: {0: 'with short hair', 1: 'with long hair', -1: 'unknown hair style'},
+42: {0: 'without leather shoes', 1: 'with leather shoes'},
+43: {0: 'without boots', 1: 'with boots'},
+44: {0: 'without walking shoes', 1: 'with walking shoes'},
+45: {0: 'without sandal', 1: 'with sandal'},
+46: {0: 'without a bag', 1: 'without a bag', -1: 'unknown bag style'},
+47: {0: 'without glasses', 1: 'with glasses'},
+48: {0: 'not stand', 1: 'stand', -1: 'unknown pose'},
+49: {0: 'not sit', 1: 'sit', -1: 'unknown pose'},
+50: {0: 'not lie', 1: 'lie', -1: 'unknown pose'},
+51: {0: 'not stoop', 1: 'stoop', -1: 'unknown pose'}}
+lup_600_1200w_attr_name = {
+0: {0: 'male', 1: 'female', -1:'gender unknown'},
+1: {0: 'age greater than 6', 1: "age less than or equal to 6", -1: 'age unknown'},
+2: {0: 'age less than 7 or greater than 18', 1: "age between 7 and 18", -1: 'age unknown'},
+3: {0: 'age less than 19 or greater than 65', 1: "age between 19 and 65", -1: 'age unknown'},
+4: {0: 'age less than 66', 1: "age greater than or equal to 66", -1: 'age unknown'},
+5: {0: 'with short sleeve coat', 1: 'with long sleeves', -1: 'coat length unknown'},
+6: {0: 'with shorts trousers', 1: 'with long trousers'},
+7: {0: 'without a skirt', 1:'with a skirt'},
+8: {0: 'without a pure pattern coat', 1: 'with a pure upper-clothes'},
+9: {0: 'without a stripe pattern coat', 1: 'with a stripe upper-clothes'},
+10: {0: 'without a design pattern coat', 1: 'with a design upper-clothes'},
+11: {0: 'without a joint pattern coat', 1: 'with a joint upper-clothes'},
+12: {0: 'without a lattic pattern coat', 1: 'with a lattic upper-clothes'},
+13: {0: 'without a black color trousers', 1: 'with black lower-clothes'},
+14: {0: 'without a white color trousers', 1: 'with white lower-clothes'},
+15: {0: 'without a gray color trousers', 1: 'with a gray color trousers'},
+16: {0: 'without a red color trousers', 1: 'with a red color trousers'},
+17: {0: 'without a yellow color trousers', 1: 'with a yellow color trousers'},
+18: {0: 'without a blue color trousers', 1: 'with a blue color trousers'},
+19: {0: 'without a green color trousers', 1: 'with a green color trousers'},
+20: {0: 'without a purple color trousers', 1: 'with a purple color trousers'},
+21: {0: 'without a pure pattern trousers', 1: 'with a pure lower-clothes'},
+22: {0: 'without a stripe pattern trousers', 1: 'with a stripe lower-clothes'},
+23: {0: 'without a design pattern trousers', 1: 'with a design lower-clothes'},
+24: {0: 'without a joint pattern trousers', 1: 'with a joint lower-clothes'},
+25: {0: 'without a lattic pattern trousers', 1: 'with a lattic lower-clothes'},
+26: {0: 'without a hat', 1: 'with a hat', -1: 'hat unknown'},
+27: {0: 'without a jacket', 1: 'with a jacket'},
+28: {0: 'without a sweater', 1: 'with a sweater'},
+29: {0: 'without a long coat', 1: 'with a long coat'},
+30: {0: 'without a shirt', 1: 'with a shirt'},
+31: {0: 'without a dress', 1: 'with a dress'},
+32: {0: 'without a business suit', 1: 'with a business suit'},
+33: {0: 'without a black color coat', 1: 'with a black color coat', -1:'unknown coat color'},
+34: {0: 'without a white color coat', 1: 'with a white color coat', -1:'unknown coat color'},
+35: {0: 'without a gray color coat', 1: 'with a gray color coat', -1:'unknown coat color'},
+36: {0: 'without a red color coat', 1: 'with a red color coat', -1:'unknown coat color'},
+37: {0: 'without a yellow color coat', 1: 'with a yellow color coat', -1:'unknown coat color'},
+38: {0: 'without a blue color coat', 1: 'with a blue color coat', -1:'unknown coat color'},
+39: {0: 'without a green color coat', 1: 'with a green color coat', -1:'unknown coat color'},
+40: {0: 'without a purple color coat', 1: 'with a purple color coat', -1:'unknown coat color'},
+41: {0: 'with short hair', 1: 'with long hair', -1: 'unknown hair style'},
+42: {0: 'without leather shoes', 1: 'with leather shoes'},
+43: {0: 'without boots', 1: 'with boots'},
+44: {0: 'without walking shoes', 1: 'with walking shoes'},
+45: {0: 'without sandal', 1: 'with sandal'},
+46: {0: 'without a bag', 1: 'without a bag', -1: 'unknown bag style'},
+47: {0: 'without glasses', 1: 'with glasses'},
+48: {0: 'not stand', 1: 'stand', -1: 'unknown pose'},
+49: {0: 'not sit', 1: 'sit', -1: 'unknown pose'},
+50: {0: 'not lie', 1: 'lie', -1: 'unknown pose'},
+51: {0: 'not stoop', 1: 'stoop', -1: 'unknown pose'}}

core/data/datasets/images/peddet_dataset_v2.py ADDED Viewed

	@@ -0,0 +1,578 @@

+import torch.utils.data as data
+from PIL import ImageFile
+ImageFile.LOAD_TRUNCATED_IMAGES=True
+import os
+import os.path
+import random
+import torch
+import numpy as np
+import copy
+import time
+from core.data.transforms.peddet_transforms import PedestrainDetectionAugmentation
+from core.data.datasets.images.seg_dataset_dev import Instances
+from typing import *
+import torch.distributed as dist
+from PIL import Image
+import json
+from pycocotools.coco import COCO
+from collections import defaultdict
+__all__ = ['PedestrainDetectionDataset_v2']
+class PetrelCOCO(COCO):
+    def __init__(self, annotation_file=None, annotation=None):
+        """
+        Constructor of Microsoft COCO helper class for reading and visualizing annotations.
+        :param annotation_file (str): location of annotation file
+        :param annotation (?): partially processed annotation file
+        :return:
+        """
+        # load dataset
+        self.dataset, self.anns, self.cats, self.imgs = dict(), dict(), dict(), dict()
+        self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
+        assert annotation_file is None or annotation is None
+        if annotation_file is not None:
+            print('loading annotations into memory...')
+            tic = time.time()
+            with open(annotation_file, 'r') as f:
+                dataset = json.load(f)
+            assert type(dataset) == dict, 'annotation file format {} not supported'.format(type(dataset))
+            print('Done (t={:0.2f}s)'.format(time.time() - tic))
+            self.dataset = dataset
+            self.createIndex()
+        if annotation is not None:
+            print('adding annotations into memory...')
+            tic = time.time()
+            dataset = annotation
+            self.dataset = dataset
+            self.createIndex()
+def convert_coco_poly_to_mask(segmentations, height, width):
+    masks = []
+    for polygons in segmentations:
+        rles = coco_mask.frPyObjects(polygons, height, width)
+        mask = coco_mask.decode(rles)
+        if len(mask.shape) < 3:
+            mask = mask[..., None]
+        mask = torch.as_tensor(mask, dtype=torch.uint8)
+        mask = mask.any(dim=2)
+        masks.append(mask)
+    if masks:
+        masks = torch.stack(masks, dim=0)
+    else:
+        masks = torch.zeros((0, height, width), dtype=torch.uint8)
+    return masks
+class ConvertCocoPolysToMask(object):
+    def __init__(self, return_masks=False):
+        self.return_masks = return_masks
+    def __call__(self, image, target):
+        w, h = image.size
+        image_id = target["image_id"]
+        image_id = torch.tensor([image_id])
+        anno = target["annotations"]
+        boxes = [obj["bbox"] for obj in anno]
+        # guard against no boxes via resizing
+        boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
+        boxes[:, 2:] += boxes[:, :2]
+        boxes[:, 0::2].clamp_(min=0, max=w)
+        boxes[:, 1::2].clamp_(min=0, max=h)
+        classes = [obj["category_id"] for obj in anno]
+        classes = torch.tensor(classes, dtype=torch.int64)
+        if self.return_masks:
+            segmentations = [obj["segmentation"] for obj in anno]
+            masks = convert_coco_poly_to_mask(segmentations, h, w)
+        keypoints = None
+        if anno and "keypoints" in anno[0]:
+            keypoints = [obj["keypoints"] for obj in anno]
+            keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
+            num_keypoints = keypoints.shape[0]
+            if num_keypoints:
+                keypoints = keypoints.view(num_keypoints, -1, 3)
+        keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
+        # for conversion to coco api
+        area = torch.tensor([obj["area"] for obj in anno])
+        iscrowd = torch.BoolTensor([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in anno])
+        iscrowd |= classes != 0
+        target = {}
+        target["boxes"] = boxes[keep]
+        target["labels"] = classes[keep]
+        if self.return_masks:
+            target["masks"] = masks[keep]
+        target["image_id"] = image_id
+        if keypoints is not None:
+            target["keypoints"] = keypoints[keep]
+        target["area"] = area[keep]
+        target["iscrowd"] = iscrowd[keep]
+        target["orig_size"] = torch.as_tensor([int(h), int(w)])
+        target["size"] = torch.as_tensor([int(h), int(w)])
+        return image, target
+class CocoDetection(data.Dataset):
+    """`MS Coco Detection <http://mscoco.org/dataset/#detections-challenge2016>`_ Dataset.
+    Args:
+        root (string): Root directory where images are downloaded to.
+        annFile (string): Path to json annotation file.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.ToTensor``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+    """
+    def __init__(self, ann, phase, transform=None, target_transform=None):
+        self.coco = PetrelCOCO(annotation=ann)
+        self.ids = list(self.coco.imgs.keys())
+        assert phase in ['train', 'val']
+        self.transform = transform
+        self.phase = phase
+        self.target_transform = target_transform
+        self.rank = dist.get_rank()
+        self.world_size = dist.get_world_size()
+        self.initialized = True
+    def _init_memcached(self):
+        if not self.initialized:
+            ## only use mc default
+            print("==> will load files from local machine")
+            server_list_config_file = "/mnt/lustre/share/memcached_client/server_list.conf"
+            client_config_file = "/mnt/lustre/share/memcached_client/client.conf"
+            self.memcached_mclient = mc.MemcachedClient.GetInstance(server_list_config_file, client_config_file)
+            ## mc-support-ceph
+            print('mc-support-ceph')
+            self.ceph_mclient = s3client
+            self.initialized = True
+    def _read_one(self, index=None):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
+        """
+        if index is None:
+            index = np.random.randint(len(self.ids))
+        coco = self.coco
+        img_id = self.ids[index]
+        ann_ids = coco.getAnnIds(imgIds=img_id)
+        target = copy.deepcopy(coco.loadAnns(ann_ids))
+        for one_target in target:
+            if 'segmentation' in one_target: del one_target['segmentation']
+            if 'keypoints' in one_target: del one_target['keypoints']
+        path = coco.loadImgs(img_id)[0]['file_name']
+        img_root = coco.loadImgs(img_id)[0]['img_root']
+        imgname = os.path.splitext(path)[0]
+        if self.phase == 'val':
+            if 'CrowdHuman' in img_root:
+                path = path.replace('.png', '.jpg')
+        ## for code in lab, we use jpg
+        if 'CrowdHuman' in img_root:
+            path = path.replace('.png', '.jpg')
+        filename = os.path.join(img_root, path)
+        try:
+            img = Image.open(filename).convert('RGB')
+            if img is None:
+                raise Exception("None Image")
+        except:
+            outputName = "failed_to_read_in_train.txt"
+            with open(outputName,"a") as g:
+                g.write("%s\n"%(filename))
+            print('Read image[{}] failed ({})'.format(index, filename))
+            ## if fail then recursive call _read_one without idx
+            return self._read_one()
+        else:
+            output = dict()
+            ##set random_seed with img idx
+            random.seed(index+self.rank)
+            np.random.seed(index+self.rank)
+            if self.transform is not None:
+                img = self.transform(img)
+            if self.target_transform is not None:
+                target = self.target_transform(target)
+            return img, target, imgname
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
+        """
+        self._init_memcached()
+        img, target, imgname = self._read_one(index)
+        return img, target, imgname
+    def __len__(self):
+        return len(self.ids)
+    def __repr__(self):
+        fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
+        fmt_str += '    Number of datapoints: {}\n'.format(self.__len__())
+        fmt_str += '    Root Location: {}\n'.format(self.root)
+        tmp = '    Transforms (if any): '
+        fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
+        tmp = '    Target Transforms (if any): '
+        fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
+        return fmt_str
+def coco_merge(
+    img_root_list: List[str], input_list: List[str],
+    indent: Optional[int] = None,
+) -> str:
+    """Merge COCO annotation files.
+    Args:
+        input_extend: Path to input file to be extended.
+        input_add: Path to input file to be added.
+        output_file : Path to output file with merged annotations.
+        indent: Argument passed to `json.dump`. See https://docs.python.org/3/library/json.html#json.dump.
+    """
+    data_list = []
+    for input in input_list:
+        with open(input, 'r') as f:
+            data_extend = json.load(f)
+        data_list.append(data_extend)
+    output= {'categories': data_list[0]['categories']}
+    output["images"], output["annotations"] = [], []
+    for i, (data, img_root) in enumerate(zip(data_list, img_root_list)):
+        print(
+            "Input {}: {} images, {} annotations".format(
+                i + 1, len(data["images"]), len(data["annotations"])
+            )
+        )
+        cat_id_map = {}
+        for new_cat in data["categories"]:
+            new_id = None
+            for output_cat in output["categories"]:
+                if new_cat["name"] == output_cat["name"]:
+                    new_id = output_cat["id"]
+                    break
+            if new_id is not None:
+                cat_id_map[new_cat["id"]] = new_id
+            else:
+                new_cat_id = max(c["id"] for c in output["categories"]) + 1
+                cat_id_map[new_cat["id"]] = new_cat_id
+                new_cat["id"] = new_cat_id
+                output["categories"].append(new_cat)
+        img_id_map = {}
+        for image in data["images"]:
+            n_imgs = len(output["images"])
+            img_id_map[image["id"]] = n_imgs
+            image["id"] = n_imgs
+            image["img_root"] = img_root
+            output["images"].append(image)
+        for annotation in data["annotations"]:
+            n_anns = len(output["annotations"])
+            annotation["id"] = n_anns
+            annotation["image_id"] = img_id_map[annotation["image_id"]]
+            annotation["category_id"] = cat_id_map[annotation["category_id"]]
+            output["annotations"].append(annotation)
+    print(
+        "Result: {} images, {} annotations".format(
+            len(output["images"]), len(output["annotations"])
+        )
+    )
+    return output
+class PedestrainDetectionDataset_v2(CocoDetection):
+    def __init__(self, ginfo, augmentation, task_spec, train=True, vit=False,
+                 num_append_fake_boxes=0,
+                 # append to 900 for a fixed length gt input in the sparse labeling (label) branch
+                 return_box_xyxy=False,
+                 append_z=True,
+                 test_trainset=False,
+                 **kwargs):
+        img_folder = task_spec['img_folder'] if isinstance(task_spec['img_folder'], list) else [task_spec['img_folder']]
+        ann_file = task_spec['ann_file'] if isinstance(task_spec['ann_file'], list) else [task_spec['ann_file']]
+        self.root = img_folder
+        ann = coco_merge(img_folder, ann_file)
+        return_masks = task_spec['return_masks']
+        phase = 'train' if train else 'val'
+        super(PedestrainDetectionDataset_v2, self).__init__(ann=ann, phase=phase)
+        self.return_box_xyxy = return_box_xyxy
+        transforms = PedestrainDetectionAugmentation(phase=phase if not test_trainset else 'val', vit=vit, return_box_xyxy=self.return_box_xyxy,
+                                                     max_size=augmentation.get('max_size',1333),)
+        name2wh = {}
+        for img_id in self.ids:
+            img_name = self.coco.loadImgs(img_id)[0]['file_name'].split('.')[0]
+            height = self.coco.loadImgs(img_id)[0]['height']
+            width = self.coco.loadImgs(img_id)[0]['width']
+            name2wh[img_name]={'width':width, 'height': height}
+        self.flag = np.zeros(len(self.ids), dtype=np.uint8)
+        for i, img_id in enumerate(self.ids):
+            img_info = self.coco.loadImgs(img_id)[0]['file_name'].split('.')[0]
+            if name2wh[img_info]['width'] / name2wh[img_info]['height'] > 1:
+                self.flag[i] = 1
+        self._transforms = transforms
+        self.phase = phase
+        self.prepare = ConvertCocoPolysToMask(return_masks)
+        self.task_name = ginfo.task_name
+        self.num_append_fake_boxes = num_append_fake_boxes
+        self.append_z = append_z
+    def _filter_ignores(self, target):
+        target = list(filter(lambda rb: rb['category_id'] > -1, target))
+        return target
+    def _minus_target_label(self, target, value):
+        results = []
+        for t in target:
+            t['category_id'] -= value
+            results.append(t)
+        return results
+    def __getitem__(self, idx):
+        dataset_dict = {}
+        img, target, imgname = super(PedestrainDetectionDataset_v2, self).__getitem__(idx)
+        target = self._minus_target_label(target, 1)
+        total = len(target)
+        image_id = self.ids[idx]
+        target = {'image_id': image_id, 'annotations': target}
+        img, target = self.prepare(img, target)
+        image_shape = (img.size[-1], img.size[-2])  # h, w
+        self._record_image_size(dataset_dict, img)
+        if self._transforms is not None:
+            img, target = self._transforms(img, target)
+        if self.num_append_fake_boxes > 0:
+            #  not take iscrowded boxes into consideration
+            len_target = target['labels'].shape[0]
+            len_append = self.num_append_fake_boxes - len_target
+            target['boxes'] = torch.cat([target['boxes'], torch.zeros([len_append, 4])], dim=0)
+            #  the appended label is set to 1(background), as ped det only has one class 0 for pedestrian
+            append_label = 1
+            target['labels'] = torch.cat([target['labels'], torch.ones([len_append]).long()*append_label], dim=0)
+            target['iscrowd'] = torch.cat([target['iscrowd'], torch.ones([len_append]).bool()], dim=0)
+            target['area'] = torch.cat([target['area'], torch.zeros([len_append])], dim=0)
+        dataset_dict['orig_size'] = target['orig_size']
+        dataset_dict['size'] = target['size']
+        del target['image_id']
+        del target['orig_size']
+        del target['size']
+        instances = Instances(image_shape, **target)
+        #  sparse_labeling should have a shape of [xyz, T(temperal)=2, V=num_append_fake_boxes, M(num_peopoe)=1]
+        #  T=2, as we consider x1y1, x2y2 as two points. Info in two points will be integrated in conv to
+        #  have a token representing a box.
+        # import pdb;
+        # pdb.set_trace()
+        sparse_labeling = target['boxes'].reshape(target['boxes'].shape[0], 2, 2).contiguous()
+        if self.append_z:
+            append_z = torch.zeros([target['boxes'].shape[0], 2, 1])
+            sparse_labeling = torch.cat([sparse_labeling, append_z], dim=2)  # num_append_fake_boxes, T, xyz
+        sparse_labeling = sparse_labeling.unsqueeze(-1).permute(2, 1, 0, 3).contiguous()
+        dataset_dict['sparse_labeling'] = sparse_labeling
+        dataset_dict["image"] = img
+        dataset_dict["image_id"] = image_id
+        dataset_dict["label"] = -1
+        dataset_dict["instances"] = instances
+        dataset_dict["filename"] = imgname
+        return dataset_dict
+    @staticmethod
+    def _record_image_size(dataset_dict, image):
+        """
+        Raise an error if the image does not match the size specified in the dict.
+        """
+        # To ensure bbox always remap to original image size    # when in PIL, reversed.
+        if "width" not in dataset_dict:
+            dataset_dict["width"] = image.size[1]
+        if "height" not in dataset_dict:
+            dataset_dict["height"] = image.size[0]
+class PedestrainDetectionDataset_v2demo(CocoDetection):
+    def __init__(self, ginfo, augmentation, task_spec, train=True, vit=False,
+                 num_append_fake_boxes=0,
+                 # append to 900 for a fixed length gt input in the sparse labeling (label) branch
+                 return_box_xyxy=False,
+                 append_z=True,
+                 test_trainset=False,
+                 demo_dir='/mnt/cache/tangshixiang/wyz_proj/demo_video_unihcpv2/folder0',
+                 **kwargs):
+        img_folder = task_spec['img_folder'] if isinstance(task_spec['img_folder'], list) else [task_spec['img_folder']]
+        ann_file = task_spec['ann_file'] if isinstance(task_spec['ann_file'], list) else [task_spec['ann_file']]
+        self.root = img_folder
+        ann = coco_merge(img_folder, ann_file)
+        return_masks = task_spec['return_masks']
+        phase = 'train' if train else 'val'
+        super(PedestrainDetectionDataset_v2demo, self).__init__(ann=ann, phase=phase)
+        self.return_box_xyxy = return_box_xyxy
+        transforms = PedestrainDetectionAugmentation(phase=phase if not test_trainset else 'val', vit=vit, return_box_xyxy=self.return_box_xyxy,
+                                                     max_size=augmentation.get('max_size',1333),)
+        name2wh = {}
+        for img_id in self.ids:
+            img_name = self.coco.loadImgs(img_id)[0]['file_name'].split('.')[0]
+            height = self.coco.loadImgs(img_id)[0]['height']
+            width = self.coco.loadImgs(img_id)[0]['width']
+            name2wh[img_name]={'width':width, 'height': height}
+        self.flag = np.zeros(len(self.ids), dtype=np.uint8)
+        for i, img_id in enumerate(self.ids):
+            img_info = self.coco.loadImgs(img_id)[0]['file_name'].split('.')[0]
+            if name2wh[img_info]['width'] / name2wh[img_info]['height'] > 1:
+                self.flag[i] = 1
+        self._transforms = transforms
+        self.phase = phase
+        self.prepare = ConvertCocoPolysToMask(return_masks)
+        self.task_name = ginfo.task_name
+        self.num_append_fake_boxes = num_append_fake_boxes
+        self.append_z = append_z
+        self.demo_dir = demo_dir
+        self.listdir = os.listdir(self.demo_dir)
+    def _filter_ignores(self, target):
+        target = list(filter(lambda rb: rb['category_id'] > -1, target))
+        return target
+    def _minus_target_label(self, target, value):
+        results = []
+        for t in target:
+            t['category_id'] -= value
+            results.append(t)
+        return results
+    def __len__(self):
+        return len(os.listdir(self.demo_dir))
+    def __getitem__(self, idx):
+        dataset_dict = {}
+        img, target, imgname = super(PedestrainDetectionDataset_v2demo, self).__getitem__(0)
+        demo_dir = self.demo_dir
+        filename = os.path.join(demo_dir, self.listdir[idx])
+        img = Image.open(filename).convert('RGB')
+        target = self._minus_target_label(target, 1)
+        total = len(target)
+        image_id = self.ids[0]
+        target = {'image_id': image_id, 'annotations': target}
+        img, target = self.prepare(img, target)
+        image_shape = (img.size[-1], img.size[-2])  # h, w
+        self._record_image_size(dataset_dict, img)
+        if self._transforms is not None:
+            img, target = self._transforms(img, target)
+        if self.num_append_fake_boxes > 0:
+            #  not take iscrowded boxes into consideration
+            len_target = target['labels'].shape[0]
+            len_append = self.num_append_fake_boxes - len_target
+            target['boxes'] = torch.cat([target['boxes'], torch.zeros([len_append, 4])], dim=0)
+            #  the appended label is set to 1(background), as ped det only has one class 0 for pedestrian
+            append_label = 1
+            target['labels'] = torch.cat([target['labels'], torch.ones([len_append]).long()*append_label], dim=0)
+            target['iscrowd'] = torch.cat([target['iscrowd'], torch.ones([len_append]).bool()], dim=0)
+            target['area'] = torch.cat([target['area'], torch.zeros([len_append])], dim=0)
+        dataset_dict['orig_size'] = target['orig_size']
+        dataset_dict['size'] = target['size']
+        del target['image_id']
+        del target['orig_size']
+        del target['size']
+        instances = Instances(image_shape, **target)
+        #  sparse_labeling should have a shape of [xyz, T(temperal)=2, V=num_append_fake_boxes, M(num_peopoe)=1]
+        #  T=2, as we consider x1y1, x2y2 as two points. Info in two points will be integrated in conv to
+        #  have a token representing a box.
+        # import pdb;
+        # pdb.set_trace()
+        sparse_labeling = target['boxes'].reshape(target['boxes'].shape[0], 2, 2).contiguous()
+        if self.append_z:
+            append_z = torch.zeros([target['boxes'].shape[0], 2, 1])
+            sparse_labeling = torch.cat([sparse_labeling, append_z], dim=2)  # num_append_fake_boxes, T, xyz
+        sparse_labeling = sparse_labeling.unsqueeze(-1).permute(2, 1, 0, 3).contiguous()
+        dataset_dict['sparse_labeling'] = sparse_labeling
+        dataset_dict["image"] = img
+        dataset_dict["image_id"] = image_id
+        dataset_dict["label"] = -1
+        dataset_dict["instances"] = instances
+        dataset_dict["filename"] = filename
+        return dataset_dict
+    @staticmethod
+    def _record_image_size(dataset_dict, image):
+        """
+        Raise an error if the image does not match the size specified in the dict.
+        """
+        # To ensure bbox always remap to original image size    # when in PIL, reversed.
+        if "width" not in dataset_dict:
+            dataset_dict["width"] = image.size[1]
+        if "height" not in dataset_dict:
+            dataset_dict["height"] = image.size[0]

core/data/datasets/images/pos_dataset_dev.py ADDED Viewed

	@@ -0,0 +1,713 @@

+import copy
+from torch.utils.data import Dataset
+from pathlib import Path
+from abc import ABCMeta, abstractmethod
+import numpy as np
+from torch.utils.data import Dataset
+import os
+import cv2
+import time
+import random
+import os.path as osp
+import os
+import torch
+import warnings
+from collections import OrderedDict, defaultdict
+from core.data.transforms.pose_transforms import *
+import json #_tricks as json
+import numpy as np
+from xtcocotools.coco import COCO
+from xtcocotools.cocoeval import COCOeval
+import torch.distributed as dist
+from core.utils import sync_print
+class PetrelCOCO(COCO):
+    def __init__(self, annotation_file=None, test_index=None, ann_data=None):
+        """
+        Constructor of Microsoft COCO helper class for reading and visualizing annotations.
+        :param annotation_file (str): location of annotation file
+        :param image_folder (str): location to the folder that hosts images.
+        :return:
+        """
+        self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict()
+        self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
+        self.anno_file = [annotation_file]
+        self.test_index = test_index
+        if annotation_file is not None:
+            print('loading annotations into memory...')
+            tic = time.time()
+            # https://github.com/cocodataset/cocoapi/pull/453/
+            if ann_data == None:
+                with open(annotation_file, 'r') as f:
+                    dataset = json.load(f)
+            else:
+                dataset = ann_data
+            assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset))
+            print('Done (t={:0.2f}s)'.format(time.time()- tic))
+            self.dataset = dataset
+            self.createIndex()
+        if 'annotations' in self.dataset:
+            for i in range(len(self.dataset['annotations'])):
+                if self.test_index is not None:
+                    keypoints = np.array(self.dataset['annotations'][i]['keypoints']).reshape([-1, 3])
+                    keypoints = keypoints[self.test_index, :]
+                    self.dataset['annotations'][i]['keypoints'] = keypoints.reshape([-1]).tolist()
+                if 'iscrowd' not in self.dataset['annotations'][i]:
+                    self.dataset['annotations'][i]['iscrowd'] = False
+class COCOPosDatasetDev(Dataset):
+    """CocoDataset dataset for top-down pose estimation.
+    "Microsoft COCO: Common Objects in Context", ECCV'2014.
+    More details can be found in the `paper
+    <https://arxiv.org/abs/1405.0312>`__ .
+    The dataset loads raw features and apply specified transforms
+    to return a dict containing the image tensors and other information.
+    COCO keypoint indexes::
+        0: 'nose',
+        1: 'left_eye',
+        2: 'right_eye',
+        3: 'left_ear',
+        4: 'right_ear',
+        5: 'left_shoulder',
+        6: 'right_shoulder',
+        7: 'left_elbow',
+        8: 'right_elbow',
+        9: 'left_wrist',
+        10: 'right_wrist',
+        11: 'left_hip',
+        12: 'right_hip',
+        13: 'left_knee',
+        14: 'right_knee',
+        15: 'left_ankle',
+        16: 'right_ankle'
+    Args:
+        ann_file (str): Path to the annotation file.
+        img_prefix (str): Path to a directory where images are held.
+            Default: None.
+        data_cfg (dict): config
+        pipeline (list[dict | callable]): A sequence of data transforms.
+        dataset_info (DatasetInfo): A class containing all dataset info.
+        test_mode (bool): Store True when building test or
+            validation dataset. Default: False.
+    """
+    def __init__(self,
+                 ginfo,
+                 ann_file,
+                 img_prefix,
+                 data_cfg,
+                 test_mode=False,
+                 use_udp=False,
+                 use_ceph=False,
+                 data_use_ratio=1,
+                 **kwargs):
+        self.image_info = {}
+        self.ann_info = {}
+        self.initialized = False
+        self.use_ceph = True
+        self.annotations_path = ann_file
+        self.img_prefix = img_prefix
+        self.test_mode = test_mode
+        print('data_cfg0', data_cfg)
+        # data_cfg=demjson.decode(data_cfg)
+        # print('data_cfg',data_cfg)
+        self.ann_info['image_size'] = np.array(data_cfg['image_size'])
+        self.ann_info['heatmap_size'] = np.array(data_cfg['heatmap_size'])
+        self.ann_info['num_joints'] = data_cfg['num_joints']
+        self.ann_info['inference_channel'] = data_cfg['inference_channel']
+        self.ann_info['num_output_channels'] = data_cfg['num_output_channels']
+        self.ann_info['dataset_channel'] = data_cfg['dataset_channel']
+        self.db = []
+        self.task_name = ginfo.task_name
+        if test_mode:
+            pipeline = [
+                LoadImageFromFile(use_ceph=use_ceph),
+                TopDownAffine(use_udp=use_udp),
+                ToUNTensor(),
+                Collect(keys=['image'],
+                        meta_keys=['image_file', 'center', 'bbox', 'scale', 'rotation', 'bbox_score', 'flip_pairs'])
+            ]
+        else:
+            pipeline = [
+                LoadImageFromFile(use_ceph=use_ceph),
+                TopDownRandomFlip(flip_prob=0.5),
+                TopDownHalfBodyTransform(num_joints_half_body=8,prob_half_body=0.3),
+                TopDownGetRandomScaleRotation(rot_factor=40, scale_factor=0.5),
+                TopDownAffine(use_udp=use_udp),
+                ToUNTensor(),
+                TopDownGenerateTarget(sigma=2, encoding='UDP' if use_udp else 'MSRA'),
+                Collect(keys=['image', 'label', 'target_weight'],
+                        meta_keys=['image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale','rotation',
+                                   'bbox_score', 'flip_pairs'])
+            ]
+        self.pipeline = ComposeX(pipeline)
+        self.use_gt_bbox = data_cfg['use_gt_bbox']
+        self.bbox_file = data_cfg['bbox_file'] if data_cfg['bbox_file'].startswith('/mnt') else (Path(__file__).parent / 'resources' / data_cfg['bbox_file']).resolve()
+        self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
+        if 'image_thr' in data_cfg:
+            warnings.warn(
+                'image_thr is deprecated, '
+                'please use det_bbox_thr instead', DeprecationWarning)
+            self.det_bbox_thr = data_cfg['image_thr']
+        self.ann_info['flip_pairs'] = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10],
+                                       [11, 12], [13, 14], [15, 16]]
+        self.ann_info['upper_body_ids'] = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+        self.ann_info['lower_body_ids'] = (11, 12, 13, 14, 15, 16)
+        self.ann_info['use_different_joint_weights'] = False
+        self.ann_info['joint_weights'] = np.array(
+            [
+                1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2,
+                1.2, 1.5, 1.5
+            ],
+            dtype=np.float32).reshape((self.ann_info['num_joints'], 1))
+        # 'https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/'
+        # 'pycocotools/cocoeval.py#L523'
+        self.coco = PetrelCOCO(ann_file)
+        cats = [
+            cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds())
+        ]
+        self.classes = ['__background__'] + cats
+        self.num_classes = len(self.classes)
+        self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
+        self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds()))
+        self._coco_ind_to_class_ind = dict(
+            (self._class_to_coco_ind[cls], self._class_to_ind[cls])
+            for cls in self.classes[1:])
+        self.img_ids = self.coco.getImgIds()
+        self.num_images = len(self.img_ids)
+        self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
+        self.dataset_name = 'coco'
+        self.db = self._get_db()
+        if data_use_ratio != 1:
+            self.db = random.sample(self.db, int(len(self.db) * data_use_ratio))
+        print(f'=> COCOPosDatasetDev num_images: {self.num_images}')
+        print(f'=> COCOPosDatasetDev load {len(self.db)} samples')
+    @staticmethod
+    def _get_mapping_id_name(imgs):
+        """
+        Args:
+            imgs (dict): dict of image info.
+        Returns:
+            tuple: Image name & id mapping dicts.
+            - id2name (dict): Mapping image id to name.
+            - name2id (dict): Mapping image name to id.
+        """
+        id2name = {}
+        name2id = {}
+        for image_id, image in imgs.items():
+            file_name = image['file_name']
+            id2name[image_id] = file_name
+            name2id[file_name] = image_id
+        return id2name, name2id
+    def _get_db(self):
+        """Load dataset."""
+        if (not self.test_mode) or self.use_gt_bbox:
+            # use ground truth bbox
+            gt_db = self._load_coco_keypoint_annotations()
+        else:
+            # use bbox from detection
+            gt_db = self._load_coco_person_detection_results()
+        return gt_db
+    def _load_coco_keypoint_annotations(self):
+        """Ground truth bbox and keypoints."""
+        gt_db = []
+        for img_id in self.img_ids:
+            gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id))
+        return gt_db
+    def _load_coco_keypoint_annotation_kernel(self, img_id):
+        """load annotation from COCOAPI.
+        Note:
+            bbox:[x1, y1, w, h]
+        Args:
+            img_id: coco image id
+        Returns:
+            dict: db entry
+        """
+        img_ann = self.coco.loadImgs(img_id)[0]
+        width = img_ann['width']
+        height = img_ann['height']
+        num_joints = self.ann_info['num_joints']
+        ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
+        objs = self.coco.loadAnns(ann_ids)
+        # sanitize bboxes
+        valid_objs = []
+        for obj in objs:
+            if 'bbox' not in obj:
+                continue
+            x, y, w, h = obj['bbox']
+            x1 = max(0, x)
+            y1 = max(0, y)
+            x2 = min(width - 1, x1 + max(0, w - 1))
+            y2 = min(height - 1, y1 + max(0, h - 1))
+            if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
+                obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
+                valid_objs.append(obj)
+        objs = valid_objs
+        bbox_id = 0
+        rec = []
+        for obj in objs:
+            if 'keypoints' not in obj:
+                continue
+            if max(obj['keypoints']) == 0:
+                continue
+            if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
+                continue
+            joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+            joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
+            keypoints = np.array(obj['keypoints']).reshape(-1, 3)
+            joints_3d[:, :2] = keypoints[:, :2]
+            joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
+            center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
+            image_file = os.path.join(self.img_prefix, self.id2name[img_id])
+            rec.append({
+                'image_file': image_file,
+                'center': center,
+                'scale': scale,
+                'bbox': obj['clean_bbox'][:4],
+                'rotation': 0,
+                'joints_3d': joints_3d,
+                'joints_3d_visible': joints_3d_visible,
+                'dataset': self.dataset_name,
+                'bbox_score': 1,
+                'bbox_id': bbox_id
+            })
+            bbox_id = bbox_id + 1
+        return rec
+    def _xywh2cs(self, x, y, w, h):
+        """This encodes bbox(x,y,w,w) into (center, scale)
+        Args:
+            x, y, w, h
+        Returns:
+            tuple: A tuple containing center and scale.
+            - center (np.ndarray[float32](2,)): center of the bbox (x, y).
+            - scale (np.ndarray[float32](2,)): scale of the bbox w & h.
+        """
+        aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[
+            'image_size'][1]
+        center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)
+        if (not self.test_mode) and np.random.rand() < 0.3:
+            center += 0.4 * (np.random.rand(2) - 0.5) * [w, h]
+        if w > aspect_ratio * h:
+            h = w * 1.0 / aspect_ratio
+        elif w < aspect_ratio * h:
+            w = h * aspect_ratio
+        # pixel std is 200.0
+        scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
+        # padding to include proper amount of context
+        scale = scale * 1.25
+        return center, scale
+    def _load_coco_person_detection_results(self):
+        """Load coco person detection results."""
+        num_joints = self.ann_info['num_joints']
+        with open(self.bbox_file, 'r') as f:
+            all_boxes = json.load(f)
+        if not all_boxes:
+            raise ValueError('=> Load %s fail!' % self.bbox_file)
+        print(f'=> Total boxes: {len(all_boxes)}')
+        kpt_db = []
+        bbox_id = 0
+        for det_res in all_boxes:
+            if det_res['category_id'] != 1:
+                continue
+            image_file = os.path.join(self.img_prefix,
+                                      self.id2name[det_res['image_id']])
+            box = det_res['bbox']
+            score = det_res['score']
+            if score < self.det_bbox_thr:
+                continue
+            center, scale = self._xywh2cs(*box[:4])
+            joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
+            joints_3d_visible = np.ones((num_joints, 3), dtype=np.float32)
+            kpt_db.append({
+                'image_file': image_file,
+                'center': center,
+                'scale': scale,
+                'rotation': 0,
+                'bbox': box[:4],
+                'bbox_score': score,
+                'dataset': self.dataset_name,
+                'joints_3d': joints_3d,
+                'joints_3d_visible': joints_3d_visible,
+                'bbox_id': bbox_id
+            })
+            bbox_id = bbox_id + 1
+        print(f'=> Total boxes after filter '
+              f'low score@{self.det_bbox_thr}: {bbox_id}')
+        return kpt_db
+    def __len__(self):
+        """Get the size of the dataset."""
+        return len(self.db)
+    def __getitem__(self, idx):
+        """Get the sample given index."""
+        results = copy.deepcopy(self.db[idx])
+        results['ann_info'] = self.ann_info
+        out = self.pipeline(results)
+        C = self.num_classes - 1 # delete the background class
+        if 'label' in out:
+            out['dense_labeling'] = np.resize(out['label'], (C, self.ann_info['image_size'][0], self.ann_info['image_size'][1]))
+        else:
+            out['dense_labeling'] = np.zeros((C, self.ann_info['image_size'][0], self.ann_info['image_size'][1]))
+        # del out['ann_info']
+        return out  # dict_keys(['image_file', 'center', 'scale', 'bbox', 'rotation', 'joints_3d', 'joints_3d_visible',
+                               # 'dataset', 'bbox_score', 'bbox_id', 'ann_info', 'image', 'flipped', 'label',
+                               # 'target_weight'])
+class MPIIPosDatasetDev(Dataset):
+    def __init__(self,
+                 ginfo,
+                 ann_file,
+                 img_prefix,
+                 data_cfg,
+                 test_mode=False,
+                 use_udp=False,
+                 data_use_ratio=1,
+                 **kwargs):
+        self.image_info = {}
+        self.ann_info = {}
+        self.ann_file = ann_file
+        self.img_prefix = img_prefix
+        self.test_mode = test_mode
+        self.ann_info['image_size'] = np.array(data_cfg['image_size'])
+        self.ann_info['heatmap_size'] = np.array(data_cfg['heatmap_size'])
+        self.ann_info['num_joints'] = data_cfg['num_joints']
+        self.ann_info['inference_channel'] = data_cfg['inference_channel']
+        self.ann_info['num_output_channels'] = data_cfg['num_output_channels']
+        self.ann_info['dataset_channel'] = data_cfg['dataset_channel']
+        self.ann_info['use_different_joint_weights'] = data_cfg.get(
+            'use_different_joint_weights', False)
+        assert self.ann_info['num_joints'] == 16
+        self.ann_info['flip_pairs'] = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]]
+        self.ann_info['flip_index'] = [5, 4, 3, 2, 1, 0, 6, 7, 8, 9, 15, 14, 13, 12, 11, 10]
+        self.ann_info['upper_body_ids'] = [7, 8, 9, 10, 11, 12, 13, 14, 15]
+        self.ann_info['lower_body_ids'] = [0, 1, 2, 3, 4, 5, 6]
+        self.ann_info['joint_weights'] = np.array([
+            1.5, 1.2, 1., 1., 1.2, 1.5, 1., 1., 1., 1., 1.5, 1.2, 1., 1., 1.2, 1.5
+        ])
+        self.ann_info['skeleton'] = [[0, 1], [1, 2], [2, 6], [6, 3], [3, 4], [4, 5], [6, 7],
+                                     [7, 8], [8, 9], [8, 12], [12, 11], [11, 10], [8, 13], [13, 14], [14, 15]]
+        self.sigmas = np.array(
+            [0.089, 0.083, 0.107, 0.107, 0.083, 0.089, 0.026, 0.026, 0.026, 0.026, 0.062, 0.072, 0.179, 0.179, 0.072,
+             0.062])
+        self.dataset_name = 'mpii'
+        self.db = self._get_db()
+        if data_use_ratio != 1:
+            self.db = random.sample(self.db, int(len(self.db) * data_use_ratio))
+        self.image_set = set(x['image_file'] for x in self.db)
+        self.num_images = len(self.image_set)
+        print(f'=> num_images: {self.num_images}')
+        print(f'=> load {len(self.db)} samples')
+        if test_mode:
+            pipeline = [
+                LoadImageFromFile(),
+                TopDownAffine(use_udp=use_udp),
+                ToUNTensor(),
+                Collect(keys=['image'],
+                        meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs'])
+            ]
+        else:
+            pipeline = [
+                LoadImageFromFile(),
+                TopDownRandomFlip(flip_prob=0.5),
+                TopDownGetRandomScaleRotation(rot_factor=40, scale_factor=0.5),
+                TopDownAffine(use_udp=use_udp),
+                ToUNTensor(),
+                TopDownGenerateTarget(sigma=2, encoding='UDP' if use_udp else 'MSRA'),
+                Collect(keys=['image', 'label', 'target_weight'],
+                        meta_keys=['image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', 'rotation',
+                                   'flip_pairs'])
+            ]
+        self.pipeline = ComposeX(pipeline)
+        self.task_name = ginfo.task_name
+    @staticmethod
+    def _get_mapping_id_name(imgs):
+        """
+        Args:
+            imgs (dict): dict of image info.
+        Returns:
+            tuple: Image name & id mapping dicts.
+            - id2name (dict): Mapping image id to name.
+            - name2id (dict): Mapping image name to id.
+        """
+        id2name = {}
+        name2id = {}
+        for image_id, image in imgs.items():
+            file_name = image['file_name']
+            id2name[image_id] = file_name
+            name2id[file_name] = image_id
+        return id2name, name2id
+    def _xywh2cs(self, x, y, w, h, padding=1.25):
+        """This encodes bbox(x,y,w,h) into (center, scale)
+        Args:
+            x, y, w, h (float): left, top, width and height
+            padding (float): bounding box padding factor
+        Returns:
+            center (np.ndarray[float32](2,)): center of the bbox (x, y).
+            scale (np.ndarray[float32](2,)): scale of the bbox w & h.
+        """
+        aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[
+            'image_size'][1]
+        center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)
+        if (not self.test_mode) and np.random.rand() < 0.3:
+            center += 0.4 * (np.random.rand(2) - 0.5) * [w, h]
+        if w > aspect_ratio * h:
+            h = w * 1.0 / aspect_ratio
+        elif w < aspect_ratio * h:
+            w = h * aspect_ratio
+        # pixel std is 200.0
+        scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
+        # padding to include proper amount of context
+        scale = scale * padding
+        return center, scale
+    def _get_normalize_factor(self, gts, *args, **kwargs):
+        """Get the normalize factor. generally inter-ocular distance measured
+        as the Euclidean distance between the outer corners of the eyes is
+        used. This function should be overrode, to measure NME.
+        Args:
+            gts (np.ndarray[N, K, 2]): Groundtruth keypoint location.
+        Returns:
+            np.ndarray[N, 2]: normalized factor
+        """
+        return np.ones([gts.shape[0], 2], dtype=np.float32)
+    def _get_db(self):
+        """Load dataset."""
+        # create train/val split
+        with open(self.ann_file, 'r') as f:
+            anno = json.load(f)
+        gt_db = []
+        bbox_id = 0
+        for a in anno:
+            image_name = a['image']
+            center = np.array(a['center'], dtype=np.float32)
+            scale = np.array([a['scale'], a['scale']], dtype=np.float32)
+            # Adjust center/scale slightly to avoid cropping limbs
+            if center[0] != -1:
+                center[1] = center[1] + 15 * scale[1]
+                # padding to include proper amount of context
+                scale = scale * 1.25
+            # MPII uses matlab format, index is 1-based,
+            # we should first convert to 0-based index
+            center = center - 1
+            joints_3d = np.zeros((self.ann_info['num_joints'], 3),
+                                 dtype=np.float32)
+            joints_3d_visible = np.zeros((self.ann_info['num_joints'], 3),
+                                         dtype=np.float32)
+            if not self.test_mode:
+                joints = np.array(a['joints'])
+                joints_vis = np.array(a['joints_vis'])
+                assert len(joints) == self.ann_info['num_joints'], \
+                    f'joint num diff: {len(joints)}' + \
+                    f' vs {self.ann_info["num_joints"]}'
+                joints_3d[:, 0:2] = joints[:, 0:2] - 1
+                joints_3d_visible[:, :2] = joints_vis[:, None]
+            image_file = osp.join(self.img_prefix, image_name)
+            gt_db.append({
+                'image_file': image_file,
+                'bbox_id': bbox_id,
+                'center': center,
+                'scale': scale,
+                'rotation': 0,
+                'joints_3d': joints_3d,
+                'joints_3d_visible': joints_3d_visible,
+                'dataset': self.dataset_name,
+                'bbox_score': 1
+            })
+            bbox_id = bbox_id + 1
+        gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
+        return gt_db
+    @staticmethod
+    def _write_keypoint_results(keypoints, res_file):
+        """Write results into a json file."""
+        with open(res_file, 'w') as f:
+            json.dump(keypoints, f, sort_keys=True, indent=4)
+    def _report_metric(self,
+                       res_file,
+                       metrics,
+                       pck_thr=0.2,
+                       pckh_thr=0.7,
+                       auc_nor=30):
+        """Keypoint evaluation.
+        Args:
+            res_file (str): Json file stored prediction results.
+            metrics (str | list[str]): Metric to be performed.
+                Options: 'PCK', 'PCKh', 'AUC', 'EPE', 'NME'.
+            pck_thr (float): PCK threshold, default as 0.2.
+            pckh_thr (float): PCKh threshold, default as 0.7.
+            auc_nor (float): AUC normalization factor, default as 30 pixel.
+        Returns:
+            List: Evaluation results for evaluation metric.
+        """
+        info_str = []
+        with open(res_file, 'r') as fin:
+            preds = json.load(fin)
+        assert len(preds) == len(self.db)
+        outputs = []
+        gts = []
+        masks = []
+        box_sizes = []
+        threshold_bbox = []
+        threshold_head_box = []
+        for pred, item in zip(preds, self.db):
+            outputs.append(np.array(pred['keypoints'])[:, :-1])
+            gts.append(np.array(item['joints_3d'])[:, :-1])
+            masks.append((np.array(item['joints_3d_visible'])[:, 0]) > 0)
+            if 'PCK' in metrics:
+                bbox = np.array(item['bbox'])
+                bbox_thr = np.max(bbox[2:])
+                threshold_bbox.append(np.array([bbox_thr, bbox_thr]))
+            if 'PCKh' in metrics:
+                head_box_thr = item['head_size']
+                threshold_head_box.append(
+                    np.array([head_box_thr, head_box_thr]))
+            box_sizes.append(item.get('box_size', 1))
+        outputs = np.array(outputs)
+        gts = np.array(gts)
+        masks = np.array(masks)
+        threshold_bbox = np.array(threshold_bbox)
+        threshold_head_box = np.array(threshold_head_box)
+        box_sizes = np.array(box_sizes).reshape([-1, 1])
+        if 'PCK' in metrics:
+            _, pck, _ = keypoint_pck_accuracy(outputs, gts, masks, pck_thr,
+                                              threshold_bbox)
+            info_str.append(('PCK', pck))
+        if 'PCKh' in metrics:
+            _, pckh, _ = keypoint_pck_accuracy(outputs, gts, masks, pckh_thr,
+                                               threshold_head_box)
+            info_str.append(('PCKh', pckh))
+        if 'AUC' in metrics:
+            info_str.append(('AUC', keypoint_auc(outputs, gts, masks,
+                                                 auc_nor)))
+        if 'EPE' in metrics:
+            info_str.append(('EPE', keypoint_epe(outputs, gts, masks)))
+        if 'NME' in metrics:
+            normalize_factor = self._get_normalize_factor(
+                gts=gts, box_sizes=box_sizes)
+            info_str.append(
+                ('NME', keypoint_nme(outputs, gts, masks, normalize_factor)))
+        return info_str
+    def __len__(self):
+        """Get the size of the dataset."""
+        return len(self.db)
+    def __getitem__(self, idx):
+        """Get the sample given index."""
+        results = copy.deepcopy(self.db[idx])
+        results['ann_info'] = self.ann_info
+        out = self.pipeline(results)
+        C = self.ann_info['num_joints']
+        if 'label' in out:
+            out['dense_labeling'] = np.resize(out['label'],
+                                              (C, self.ann_info['image_size'][1], self.ann_info['image_size'][0]))
+        else:
+            out['dense_labeling'] = np.zeros((C, self.ann_info['image_size'][1], self.ann_info['image_size'][0]))
+        # import pdb;pdb.set_trace()
+        return out
+    def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+        """sort kpts and remove the repeated ones."""
+        kpts = sorted(kpts, key=lambda x: x[key])
+        num = len(kpts)
+        for i in range(num - 1, 0, -1):
+            if kpts[i][key] == kpts[i - 1][key]:
+                del kpts[i]
+        return kpts

core/data/datasets/images/resources/CHval.odgt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8f9c1b0cb455d6b0fb53b73d1dd92fbb3b3b02bfd897661ceddc246e4991b5e
+size 19994003

core/data/datasets/images/resources/COCO_val2017_detections_AP_H_56_person.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53ba0ad8d0fd461c5a000cd90797fa8c39cd8c38cd125125c0412626ff592d59
+size 16383781

core/data/datasets/images/resources/mpii_gt_val.mat ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ab6874f858046c74acdd1b9dacb8746a2ddddc331952487a9774e3ee0c2b075
+size 1257356

core/data/datasets/images/resources/test_caltech_heavy_1xnew.odgt ADDED Viewed

The diff for this file is too large to render. See raw diff

core/data/datasets/images/seg_data_tools/__init__.py ADDED Viewed

File without changes

core/data/datasets/images/seg_data_tools/collate.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import random
+import torch
+import torch.nn.functional as F
+from torch.utils.data.dataloader import default_collate
+from lib.extensions.parallel.data_container import DataContainer
+def stack(batch, data_key=None, return_dc=False):
+    if isinstance(batch[0][data_key], DataContainer):
+        if batch[0][data_key].stack:
+            assert isinstance(batch[0][data_key].data, torch.Tensor)
+            samples = [sample[data_key].data for sample in batch]
+            return default_collate(samples)
+        elif not return_dc:
+            return [sample[data_key].data for sample in batch]
+        else:
+            return DataContainer([sample[data_key].data for sample in batch])
+    else:
+        return default_collate([sample[data_key] for sample in batch])
+def collate(batch, trans_dict):
+    data_keys = batch[0].keys()
+    target_width, target_height = trans_dict['input_size']
+    target_widths, target_heights = [target_width] * len(batch), [target_height] * len(batch)
+    for i in range(len(batch)):
+        target_width, target_height = target_widths[i], target_heights[i]
+        if 'meta' in data_keys:
+            batch[i]['meta'].data['input_size'] = [target_width, target_height]
+        channels, height, width = batch[i]['img'].size()
+        if height == target_height and width == target_width:
+            continue
+        scaled_size = [width, height]
+        if trans_dict['align_method'] in ['only_scale', 'scale_and_pad']:
+            w_scale_ratio = target_width / width
+            h_scale_ratio = target_height / height
+            if trans_dict['align_method'] == 'scale_and_pad':
+                w_scale_ratio = min(w_scale_ratio, h_scale_ratio)
+                h_scale_ratio = w_scale_ratio
+            scaled_size = (int(round(width * w_scale_ratio)), int(round(height * h_scale_ratio)))
+            if 'meta' in data_keys and 'border_size' in batch[i]['meta'].data:
+                batch[i]['meta'].data['border_size'] = scaled_size
+            scaled_size_hw = (scaled_size[1], scaled_size[0])
+            batch[i]['img'] = DataContainer(F.interpolate(batch[i]['img'].data.unsqueeze(0),
+                                            scaled_size_hw, mode='bilinear', align_corners=True).squeeze(0), stack=True)
+            if 'labelmap' in data_keys:
+                labelmap = batch[i]['labelmap'].data.unsqueeze(0).unsqueeze(0).float()
+                labelmap = F.interpolate(labelmap, scaled_size_hw, mode='nearest').long().squeeze(0).squeeze(0)
+                batch[i]['labelmap'] = DataContainer(labelmap, stack=True)
+            if 'maskmap' in data_keys:
+                maskmap = batch[i]['maskmap'].data.unsqueeze(0).unsqueeze(0).float()
+                maskmap = F.interpolate(maskmap, scaled_size_hw, mode='nearest').long().squeeze(0).squeeze(0)
+                batch[i]['maskmap'].data = DataContainer(maskmap, stack=True)
+        pad_width = target_width - scaled_size[0]
+        pad_height = target_height - scaled_size[1]
+        assert pad_height >= 0 and pad_width >= 0
+        if pad_width > 0 or pad_height > 0:
+            assert trans_dict['align_method'] in ['only_pad', 'scale_and_pad']
+            left_pad = 0
+            up_pad = 0
+            if 'pad_mode' not in trans_dict or trans_dict['pad_mode'] == 'random':
+                left_pad = random.randint(0, pad_width)  # pad_left
+                up_pad = random.randint(0, pad_height)  # pad_up
+            elif trans_dict['pad_mode'] == 'pad_left_up':
+                left_pad = pad_width
+                up_pad = pad_height
+            elif trans_dict['pad_mode'] == 'pad_right_down':
+                left_pad = 0
+                up_pad = 0
+            elif trans_dict['pad_mode'] == 'pad_center':
+                left_pad = pad_width // 2
+                up_pad = pad_height // 2
+            elif trans_dict['pad_mode'] == 'pad_border':
+                if random.randint(0, 1) == 0:
+                    left_pad = pad_width
+                    up_pad = pad_height
+                else:
+                    left_pad = 0
+                    up_pad = 0
+            else:
+                raise ValueError("mode not define")
+                exit(1)
+            pad = (left_pad, pad_width-left_pad, up_pad, pad_height-up_pad)
+            batch[i]['img'] = DataContainer(F.pad(batch[i]['img'].data, pad=pad, value=0), stack=batch[i]['img'].stack)
+            if 'labelmap' in data_keys:
+                batch[i]['labelmap'] = DataContainer(F.pad(batch[i]['labelmap'].data, pad=pad, value=-1), stack=batch[i]['labelmap'].stack)
+            if 'maskmap' in data_keys:
+                batch[i]['maskmap'] = DataContainer(F.pad(batch[i]['maskmap'].data, pad=pad, value=0), stack=batch[i]['maskmap'].stack)
+            if 'distance_map' in data_keys:
+                batch[i]['distance_map'] = DataContainer(F.pad(batch[i]['distance_map'].data, pad=pad, value=255), stack=batch[i]['distance_map'].stack)
+            if 'angle_map' in data_keys:
+                batch[i]['angle_map'] = DataContainer(F.pad(batch[i]['angle_map'].data, pad=pad, value=0), stack=batch[i]['angle_map'].stack)
+            if 'mask_label_map' in data_keys:
+                batch[i]['mask_label_map'] = DataContainer(F.pad(batch[i]['mask_label_map'].data, pad=pad, value=-1), stack=batch[i]['mask_label_map'].stack)
+            if 'direction_label_map' in data_keys:
+                batch[i]['direction_label_map'] = DataContainer(F.pad(batch[i]['direction_label_map'].data, pad=pad, value=-1), stack=batch[i]['direction_label_map'].stack)
+            if 'multi_label_direction_map' in data_keys:
+                batch[i]['multi_label_direction_map'] = DataContainer(F.pad(batch[i]['multi_label_direction_map'].data, pad=pad, value=-1), stack=batch[i]['multi_label_direction_map'].stack)
+            if 'energy_label_map' in data_keys:
+                batch[i]['energy_label_map'] = DataContainer(F.pad(batch[i]['energy_label_map'].data, pad=pad, value=-1), stack=batch[i]['energy_label_map'].stack)
+            if 'offsetmap_h' in data_keys:
+                batch[i]['offsetmap_h'] = DataContainer(F.pad(batch[i]['offsetmap_h'].data, pad=pad, value=0), stack=batch[i]['offsetmap_h'].stack)
+            if 'offsetmap_w' in data_keys:
+                batch[i]['offsetmap_w'] = DataContainer(F.pad(batch[i]['offsetmap_w'].data, pad=pad, value=0), stack=batch[i]['offsetmap_w'].stack)
+    return dict({key: stack(batch, data_key=key) for key in data_keys})

core/data/datasets/images/seg_data_tools/cv2_aug_transforms.py ADDED Viewed

	@@ -0,0 +1,889 @@

+import collections
+import math
+import random
+import cv2
+import numpy as np
+class _BaseTransform(object):
+    DATA_ITEMS = (
+        'labelmap', 'maskmap',
+        'distance_map', 'angle_map', 'multi_label_direction_map',
+        'boundary_map', 'offsetmap',
+        # 'offsetmap_h', 'offsetmap_w',
+        'region_indexmap'
+    )
+    def __call__(self, img, **kwargs):
+        data_dict = collections.defaultdict(lambda: None)
+        data_dict.update(kwargs)
+        return img, data_dict
+    def _process(self, img, data_dict, skip_condition, *args, **kwargs):
+        assert isinstance(img, np.ndarray), \
+            "img should be numpy array, got {}.".format(type(img))
+        if not skip_condition:
+            img = self._process_img(img, *args, **kwargs)
+        ret_dict = collections.defaultdict(lambda: None)
+        for name in self.DATA_ITEMS:
+            func_name = '_process_' + name
+            x = data_dict[name]
+            assert isinstance(x, np.ndarray) or x is None, \
+                "{} should be numpy array or None, got {}.".format(
+                    name, type(x))
+            if hasattr(self, func_name) and x is not None and not skip_condition:
+                ret_dict[name] = getattr(self, func_name)(x, *args, **kwargs)
+            else:
+                ret_dict[name] = x
+        return img, ret_dict
+class Padding(_BaseTransform):
+    """ Padding the Image to proper size.
+            Args:
+                stride: the stride of the network.
+                pad_value: the value that pad to the image border.
+                img: Image object as input.
+            Returns::
+                img: Image object.
+    """
+    def __init__(self, pad=None, pad_ratio=0.5, mean=(104, 117, 123), allow_outside_center=True):
+        self.pad = pad
+        self.ratio = pad_ratio
+        self.mean = mean
+        self.allow_outside_center = allow_outside_center
+    def _pad(self, x, pad_value, height, width, target_size, offset_left, offset_up):
+        expand_x = np.zeros((
+            max(height, target_size[1]) + abs(offset_up),
+            max(width, target_size[0]) + abs(offset_left),
+            *x.shape[2:]
+        ), dtype=x.dtype)
+        expand_x[:, :] = pad_value
+        expand_x[
+            abs(min(offset_up, 0)):abs(min(offset_up, 0)) + height,
+            abs(min(offset_left, 0)):abs(min(offset_left, 0)) + width] = x
+        x = expand_x[
+            max(offset_up, 0):max(offset_up, 0) + target_size[1],
+            max(offset_left, 0):max(offset_left, 0) + target_size[0]
+        ]
+        return x
+    def _process_img(self, img, *args):
+        return self._pad(img, self.mean, *args)
+    def _process_labelmap(self, x, *args):
+        return self._pad(x, 255, *args)
+    def _process_region_indexmap(self, x, *args):
+        return self._pad(x, 0, *args)
+    def _process_maskmap(self, x, *args):
+        return self._pad(x, 1, *args)
+    def _process_distance_map(self, x, *args):
+        return self._pad(x, 255, *args)
+    def _process_angle_map(self, x, *args):
+        return self._pad(x, 0, *args)
+    def _process_boundary_map(self, x, *args):
+        return self._pad(x, 0, *args)
+    def _process_multi_label_direction_map(self, x, *args):
+        return self._pad(x, 0, *args)
+    # def _process_offsetmap_h(self, x, *args):
+    #     return self._pad(x, 0, *args)
+    # def _process_offsetmap_w(self, x, *args):
+    #     return self._pad(x, 0, *args)
+    def _process_offsetmap(self, x, *args):
+        return self._pad(x, 0, *args)
+    def __call__(self, img, **kwargs):
+        img, data_dict = super().__call__(img, **kwargs)
+        height, width, channels = img.shape
+        left_pad, up_pad, right_pad, down_pad = self.pad
+        target_size = [width + left_pad +
+                       right_pad, height + up_pad + down_pad]
+        offset_left = -left_pad
+        offset_up = -up_pad
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio,
+            height, width, target_size, offset_left, offset_up
+        )
+class RandomHFlip(_BaseTransform):
+    def __init__(self, swap_pair=None, flip_ratio=0.5):
+        self.swap_pair = swap_pair
+        self.ratio = flip_ratio
+    def _process_img(self, img):
+        return cv2.flip(img, 1)
+    def _process_labelmap(self, labelmap):
+        labelmap = cv2.flip(labelmap, 1)
+        # to handle datasets with left/right annatations
+        if self.swap_pair is not None:
+            assert isinstance(self.swap_pair, (tuple, list))
+            temp = labelmap.copy()
+            for pair in self.swap_pair:
+                assert isinstance(pair, (tuple, list)) and len(pair) == 2
+                labelmap[temp == pair[0]] = pair[1]
+                labelmap[temp == pair[1]] = pair[0]
+        return labelmap
+    def _process_region_indexmap(self, labelmap):
+        return cv2.flip(labelmap, 1)
+    def _process_maskmap(self, x):
+        return cv2.flip(x, 1)
+    def _process_distance_map(self, x):
+        return cv2.flip(x, 1)
+    def _process_angle_map(self, angle_map):
+        ret_angle_map = angle_map.copy()
+        mask = (angle_map > 0) & (angle_map < 180)
+        ret_angle_map[mask] = 180 - angle_map[mask]
+        mask = (angle_map < 0) & (angle_map > -180)
+        ret_angle_map[mask] = - (180 + angle_map[mask])
+        ret_angle_map = cv2.flip(ret_angle_map, 1)
+        return ret_angle_map
+    def _process_boundary_map(self, x):
+        return cv2.flip(x, 1)
+    def _process_multi_label_direction_map(self, multi_label_direction_map):
+        perm = [4, 3, 2, 1, 0, 7, 6, 5]
+        multi_label_direction_map = cv2.flip(multi_label_direction_map, 1)
+        multi_label_direction_map = multi_label_direction_map[..., perm]
+        return multi_label_direction_map
+    # def _process_offsetmap_h(self, x):
+    #     return cv2.flip(x, 1)
+    # def _process_offsetmap_w(self, x):
+    #     return -cv2.flip(x, 1)
+    def _process_offsetmap_w(self, x):
+        x = cv2.flip(x, 1)
+        x[..., 1] = -x[..., 1]
+        return x
+    def __call__(self, img, **kwargs):
+        img, data_dict = super().__call__(img, **kwargs)
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio
+        )
+class RandomSaturation(_BaseTransform):
+    def __init__(self, lower=0.5, upper=1.5, saturation_ratio=0.5):
+        self.lower = lower
+        self.upper = upper
+        self.ratio = saturation_ratio
+        assert self.upper >= self.lower, "saturation upper must be >= lower."
+        assert self.lower >= 0, "saturation lower must be non-negative."
+    def _process_img(self, img):
+        img = img.astype(np.float32)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+        img[:, :, 1] *= random.uniform(self.lower, self.upper)
+        img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
+        img = np.clip(img, 0, 255).astype(np.uint8)
+        return img
+    def __call__(self, img, **kwargs):
+        img, data_dict = super().__call__(img, **kwargs)
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio
+        )
+class RandomHue(_BaseTransform):
+    def __init__(self, delta=18, hue_ratio=0.5):
+        assert 0 <= delta <= 360
+        self.delta = delta
+        self.ratio = hue_ratio
+    def _process_img(self, img):
+        img = img.astype(np.float32)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+        img[:, :, 0] += random.uniform(-self.delta, self.delta)
+        img[:, :, 0][img[:, :, 0] > 360] -= 360
+        img[:, :, 0][img[:, :, 0] < 0] += 360
+        img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
+        img = np.clip(img, 0, 255).astype(np.uint8)
+        return img
+    def __call__(self, img, **kwargs):
+        img, data_dict = super().__call__(img, **kwargs)
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio
+        )
+class RandomPerm(_BaseTransform):
+    def __init__(self, perm_ratio=0.5):
+        self.ratio = perm_ratio
+        self.perms = ((0, 1, 2), (0, 2, 1),
+                      (1, 0, 2), (1, 2, 0),
+                      (2, 0, 1), (2, 1, 0))
+    def _process_img(self, img):
+        swap = self.perms[random.randint(0, len(self.perms) - 1)]
+        img = img[:, :, swap].astype(np.uint8)
+        return img
+    def __call__(self, img, **kwargs):
+        img, data_dict = super().__call__(img, **kwargs)
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio
+        )
+class RandomContrast(_BaseTransform):
+    def __init__(self, lower=0.5, upper=1.5, contrast_ratio=0.5):
+        self.lower = lower
+        self.upper = upper
+        self.ratio = contrast_ratio
+        assert self.upper >= self.lower, "contrast upper must be >= lower."
+        assert self.lower >= 0, "contrast lower must be non-negative."
+    def _process_img(self, img):
+        img = img.astype(np.float32)
+        img *= random.uniform(self.lower, self.upper)
+        img = np.clip(img, 0, 255).astype(np.uint8)
+        return img
+    def __call__(self, img, **kwargs):
+        img, data_dict = super().__call__(img, **kwargs)
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio
+        )
+class RandomBrightness(_BaseTransform):
+    def __init__(self, shift_value=30, brightness_ratio=0.5):
+        self.shift_value = shift_value
+        self.ratio = brightness_ratio
+    def _process_img(self, img):
+        img = img.astype(np.float32)
+        shift = random.randint(-self.shift_value, self.shift_value)
+        img[:, :, :] += shift
+        img = np.around(img)
+        img = np.clip(img, 0, 255).astype(np.uint8)
+        return img
+    def __call__(self, img, **kwargs):
+        img, data_dict = super().__call__(img, **kwargs)
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio
+        )
+class RandomResize(_BaseTransform):
+    """Resize the given numpy.ndarray to random size and aspect ratio.
+    Args:
+        scale_min: the min scale to resize.
+        scale_max: the max scale to resize.
+    """
+    def __init__(self, scale_range=(0.75, 1.25), aspect_range=(0.9, 1.1), target_size=None,
+                 resize_bound=None, method='random', max_side_bound=None, scale_list=None, resize_ratio=0.5):
+        self.scale_range = scale_range
+        self.aspect_range = aspect_range
+        self.resize_bound = resize_bound
+        self.max_side_bound = max_side_bound
+        self.scale_list = scale_list
+        self.method = method
+        self.ratio = resize_ratio
+        if target_size is not None:
+            if isinstance(target_size, int):
+                self.input_size = (target_size, target_size)
+            elif isinstance(target_size, (list, tuple)) and len(target_size) == 2:
+                self.input_size = target_size
+            else:
+                raise TypeError(
+                    'Got inappropriate size arg: {}'.format(target_size))
+        else:
+            self.input_size = None
+    def get_scale(self, img_size):
+        if self.method == 'random':
+            scale_ratio = random.uniform(
+                self.scale_range[0], self.scale_range[1])
+            return scale_ratio
+        elif self.method == 'bound':
+            scale1 = self.resize_bound[0] / min(img_size)
+            scale2 = self.resize_bound[1] / max(img_size)
+            scale = min(scale1, scale2)
+            return scale
+        else:
+            raise ValueError("invalid method")
+            exit(1)
+    def _process_img(self, img, converted_size, *args):
+        return cv2.resize(img, converted_size, interpolation=cv2.INTER_CUBIC).astype(np.uint8)
+    def _process_labelmap(self, x, converted_size, *args):
+        return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST)
+    def _process_region_indexmap(self, x, converted_size, *args):
+        return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST)
+    def _process_maskmap(self, x, converted_size, *args):
+        return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST)
+    def _process_distance_map(self, x, converted_size, *args):
+        return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST)
+    def _process_angle_map(self, x, converted_size, *args):
+        return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST)
+    def _process_boundary_map(self, x, converted_size, *args):
+        return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST)
+    def _process_multi_label_direction_map(self, x, converted_size, *args):
+        return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST)
+    # def _process_offsetmap_h(self, x, converted_size, h_scale_ratio, w_scale_ratio):
+    #     return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST) * h_scale_ratio
+    # def _process_offsetmap_w(self, x, converted_size, h_scale_ratio, w_scale_ratio):
+    #     return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST) * w_scale_ratio
+    def _process_offsetmap(self, x, converted_size, h_scale_ratio, w_scale_ratio):
+        return cv2.resize(x, converted_size, interpolation=cv2.INTER_NEAREST)
+    def __call__(self, img, **kwargs):
+        """
+        Args:
+            img     (Image):   Image to be resized.
+            maskmap    (Image):   Mask to be resized.
+            kpt     (list):    keypoints to be resized.
+            center: (list):    center points to be resized.
+        Returns:
+            Image:  Randomly resize image.
+            Image:  Randomly resize maskmap.
+            list:   Randomly resize keypoints.
+            list:   Randomly resize center points.
+        """
+        img, data_dict = super().__call__(img, **kwargs)
+        height, width, _ = img.shape
+        if self.scale_list is None:
+            scale_ratio = self.get_scale([width, height])
+        else:
+            scale_ratio = self.scale_list[random.randint(
+                0, len(self.scale_list)-1)]
+        aspect_ratio = random.uniform(*self.aspect_range)
+        w_scale_ratio = math.sqrt(aspect_ratio) * scale_ratio
+        h_scale_ratio = math.sqrt(1.0 / aspect_ratio) * scale_ratio
+        if self.max_side_bound is not None and max(height*h_scale_ratio, width*w_scale_ratio) > self.max_side_bound:
+            d_ratio = self.max_side_bound / max(height * h_scale_ratio, width * w_scale_ratio)
+            w_scale_ratio *= d_ratio
+            h_scale_ratio *= d_ratio
+        converted_size = (int(width * w_scale_ratio),
+                          int(height * h_scale_ratio))
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio,
+            converted_size, h_scale_ratio, w_scale_ratio
+        )
+class RandomRotate(_BaseTransform):
+    """Rotate the input numpy.ndarray and points to the given degree.
+    Args:
+        degree (number): Desired rotate degree.
+    """
+    def __init__(self, max_degree, rotate_ratio=0.5, mean=(104, 117, 123)):
+        assert isinstance(max_degree, int)
+        self.max_degree = max_degree
+        self.ratio = rotate_ratio
+        self.mean = mean
+    def _warp(self, x, border_value, rotate_mat, new_width, new_height):
+        return cv2.warpAffine(x, rotate_mat, (new_width, new_height), borderValue=border_value)
+    def _process_img(self, x, *args):
+        return self._warp(x, self.mean, *args).astype(np.uint8)
+    def _process_labelmap(self, x, *args):
+        return self._warp(x, (255, 255, 255), *args).astype(np.uint8)
+    def _process_maskmap(self, x, *args):
+        return self._warp(x, (1, 1, 1), *args).astype(np.uint8)
+    def __call__(self, img, **kwargs):
+        """
+        Args:
+            img    (Image):     Image to be rotated.
+            maskmap   (Image):     Mask to be rotated.
+            kpt    (list):      Keypoints to be rotated.
+            center (list):      Center points to be rotated.
+        Returns:
+            Image:     Rotated image.
+            list:      Rotated key points.
+        """
+        img, data_dict = super().__call__(img, **kwargs)
+        rotate_degree = random.uniform(-self.max_degree, self.max_degree)
+        height, width, _ = img.shape
+        img_center = (width / 2.0, height / 2.0)
+        rotate_mat = cv2.getRotationMatrix2D(img_center, rotate_degree, 1.0)
+        cos_val = np.abs(rotate_mat[0, 0])
+        sin_val = np.abs(rotate_mat[0, 1])
+        new_width = int(height * sin_val + width * cos_val)
+        new_height = int(height * cos_val + width * sin_val)
+        rotate_mat[0, 2] += (new_width / 2.) - img_center[0]
+        rotate_mat[1, 2] += (new_height / 2.) - img_center[1]
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio,
+            rotate_mat, new_width, new_height
+        )
+class RandomCrop(_BaseTransform):
+    """Crop the given numpy.ndarray and  at a random location.
+    Args:
+        size (int or tuple): Desired output size of the crop.(w, h)
+    """
+    def __init__(self, crop_size, crop_ratio=0.5, method='random', grid=None, allow_outside_center=True):
+        self.ratio = crop_ratio
+        self.method = method
+        self.grid = grid
+        self.allow_outside_center = allow_outside_center
+        if isinstance(crop_size, float):
+            self.size = (crop_size, crop_size)
+        elif isinstance(crop_size, collections.Iterable) and len(crop_size) == 2:
+            self.size = crop_size
+        else:
+            raise TypeError('Got inappropriate size arg: {}'.format(crop_size))
+    def get_lefttop(self, crop_size, img_size):
+        if self.method == 'center':
+            return [(img_size[0] - crop_size[0]) // 2, (img_size[1] - crop_size[1]) // 2]
+        elif self.method == 'random':
+            x = random.randint(0, img_size[0] - crop_size[0])
+            y = random.randint(0, img_size[1] - crop_size[1])
+            return [x, y]
+        elif self.method == 'grid':
+            grid_x = random.randint(0, self.grid[0] - 1)
+            grid_y = random.randint(0, self.grid[1] - 1)
+            x = grid_x * ((img_size[0] - crop_size[0]) // (self.grid[0] - 1))
+            y = grid_y * ((img_size[1] - crop_size[1]) // (self.grid[1] - 1))
+            return [x, y]
+        else:
+            raise ValueError('Crop method invalid')
+            exit(1)
+    def _crop(self, x, offset_up, offset_left, target_size):
+        return x[offset_up:offset_up + target_size[1], offset_left:offset_left + target_size[0]]
+    def _process_img(self, img, *args):
+        return self._crop(img, *args)
+    def _process_labelmap(self, x, *args):
+        return self._crop(x, *args)
+    def _process_region_indexmap(self, x, *args):
+        return self._crop(x, *args)
+    def _process_maskmap(self, x, *args):
+        return self._crop(x, *args)
+    def _process_distance_map(self, x, *args):
+        return self._crop(x, *args)
+    def _process_angle_map(self, x, *args):
+        return self._crop(x, *args)
+    def _process_boundary_map(self, x, *args):
+        return self._crop(x, *args)
+    def _process_multi_label_direction_map(self, x, *args):
+        return self._crop(x, *args)
+    # def _process_offsetmap_h(self, x, *args):
+    #     return self._crop(x, *args)
+    # def _process_offsetmap_w(self, x, *args):
+    #     return self._crop(x, *args)
+    def _process_offsetmap(self, x, *args):
+        return self._crop(x, *args)
+    def __call__(self, img, **kwargs):
+        """
+        Args:
+            img (Image):   Image to be cropped.
+            maskmap (Image):  Mask to be cropped.
+        Returns:
+            Image:  Cropped image.
+            Image:  Cropped maskmap.
+            list:   Cropped keypoints.
+            list:   Cropped center points.
+        """
+        img, data_dict = super().__call__(img, **kwargs)
+        height, width, _ = img.shape
+        target_size = [min(self.size[0], width), min(self.size[1], height)]
+        offset_left, offset_up = self.get_lefttop(target_size, [width, height])
+        return self._process(
+            img, data_dict,
+            random.random() > self.ratio,
+            offset_up, offset_left, target_size
+        )
+class Resize(RandomResize):
+    """Resize the given numpy.ndarray to random size and aspect ratio.
+    Args:
+        scale_min: the min scale to resize.
+        scale_max: the max scale to resize.
+    """
+    def __init__(self, target_size=None, min_side_length=None, max_side_length=None, max_side_bound=None):
+        self.target_size = target_size
+        self.min_side_length = min_side_length
+        self.max_side_length = max_side_length
+        self.max_side_bound = max_side_bound
+    def __call__(self, img, **kwargs):
+        img, data_dict = super(RandomResize, self).__call__(img, **kwargs)
+        height, width, _ = img.shape
+        if self.target_size is not None:
+            target_size = self.target_size
+            w_scale_ratio = self.target_size[0] / width
+            h_scale_ratio = self.target_size[1] / height
+        elif self.min_side_length is not None:
+            scale_ratio = self.min_side_length / min(width, height)
+            w_scale_ratio, h_scale_ratio = scale_ratio, scale_ratio
+            target_size = [int(round(width * w_scale_ratio)),
+                           int(round(height * h_scale_ratio))]
+        else:
+            scale_ratio = self.max_side_length / max(width, height)
+            w_scale_ratio, h_scale_ratio = scale_ratio, scale_ratio
+            target_size = [int(round(width * w_scale_ratio)),
+                           int(round(height * h_scale_ratio))]
+        if self.max_side_bound is not None and max(target_size) > self.max_side_bound:
+            d_ratio = self.max_side_bound / max(target_size)
+            w_scale_ratio = d_ratio * w_scale_ratio
+            h_scale_ratio = d_ratio * h_scale_ratio
+            target_size = [int(round(width * w_scale_ratio)),
+                           int(round(height * h_scale_ratio))]
+        target_size = tuple(target_size)
+        return self._process(
+            img, data_dict,
+            False,
+            target_size, h_scale_ratio, w_scale_ratio
+        )
+class CV2AugCompose(object):
+    """Composes several transforms together.
+    Args:
+        transforms (list of ``Transform`` objects): list of transforms to compose.
+    Example:
+        >>> CV2AugCompose([
+        >>>     RandomCrop(),
+        >>> ])
+    """
+    def __init__(self, configer, split='train'):
+        self.configer = configer
+        self.split = split
+        if self.split == 'train':
+            shuffle_train_trans = []
+            if self.configer.exists('train_trans', 'shuffle_trans_seq'):
+                if isinstance(self.configer.get('train_trans', 'shuffle_trans_seq')[0], list):
+                    train_trans_seq_list = self.configer.get(
+                        'train_trans', 'shuffle_trans_seq')
+                    for train_trans_seq in train_trans_seq_list:
+                        shuffle_train_trans += train_trans_seq
+                else:
+                    shuffle_train_trans = self.configer.get(
+                        'train_trans', 'shuffle_trans_seq')
+            trans_seq = self.configer.get(
+                'train_trans', 'trans_seq') + shuffle_train_trans
+            trans_key = 'train_trans'
+        else:
+            trans_seq = self.configer.get('val_trans', 'trans_seq')
+            trans_key = 'val_trans'
+        self.transforms = dict()
+        self.trans_config = self.configer.get(trans_key)
+        for trans_name in trans_seq:
+            specs = TRANSFORM_SPEC[trans_name]
+            config = self.configer.get(trans_key, trans_name)
+            for spec in specs:
+                if 'when' not in spec:
+                    break
+                choose_this = True
+                for cond_key, cond_value in spec['when'].items():
+                    choose_this = choose_this and (
+                        config[cond_key] == cond_value)
+                if choose_this:
+                    break
+            else:
+                raise RuntimeError("Not support!")
+            kwargs = {}
+            for arg_name, arg_path in spec["args"].items():
+                if isinstance(arg_path, str):
+                    arg_value = config.get(arg_path, None)
+                elif isinstance(arg_path, list):
+                    arg_value = self.configer.get(*arg_path)
+                kwargs[arg_name] = arg_value
+            klass = TRANSFORM_MAPPING[trans_name]
+            self.transforms[trans_name] = klass(**kwargs)
+    def __call__(self, img, **data_dict):
+        orig_key_list = list(data_dict)
+        if self.configer.get('data', 'input_mode') == 'RGB':
+            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+        if self.split == 'train':
+            shuffle_trans_seq = []
+            if self.configer.exists('train_trans', 'shuffle_trans_seq'):
+                if isinstance(self.configer.get('train_trans', 'shuffle_trans_seq')[0], list):
+                    shuffle_trans_seq_list = self.configer.get('train_trans', 'shuffle_trans_seq')
+                    shuffle_trans_seq = shuffle_trans_seq_list[random.randint(0, len(shuffle_trans_seq_list))]
+                else:
+                    shuffle_trans_seq = self.configer.get('train_trans', 'shuffle_trans_seq')
+                    random.shuffle(shuffle_trans_seq)
+            trans_seq = shuffle_trans_seq + self.configer.get('train_trans', 'trans_seq')
+        else:
+            trans_seq = self.configer.get('val_trans', 'trans_seq')
+        for trans_key in trans_seq:
+            img, data_dict = self.transforms[trans_key](img, **data_dict)
+        if self.configer.get('data', 'input_mode') == 'RGB':
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        return (img, *[data_dict[key] for key in orig_key_list])
+    def __repr__(self):
+        import pprint
+        return 'CV2AugCompose({})'.format(pprint.pformat(self.trans_config))
+TRANSFORM_MAPPING = {
+    "random_saturation": RandomSaturation,
+    "random_hue": RandomHue,
+    "random_perm": RandomPerm,
+    "random_contrast": RandomContrast,
+    "padding": Padding,
+    "random_brightness": RandomBrightness,
+    "random_hflip": RandomHFlip,
+    "random_resize": RandomResize,
+    "random_crop": RandomCrop,
+    "random_rotate": RandomRotate,
+    "resize": Resize,
+}
+TRANSFORM_SPEC = {
+    "random_style": [{
+        "args": {
+            "style_ratio": "ratio"
+        }
+    }],
+    "random_saturation": [{
+        "args": {
+            "lower": "lower",
+            "upper": "upper",
+            "saturation_ratio": "ratio"
+        }
+    }],
+    "random_hue": [{
+        "args": {
+            "delta": "delta",
+            "hue_ratio": "ratio"
+        }
+    }],
+    "ramdom_perm": [{
+        "args": {
+            "perm_ratio": "ratio"
+        }
+    }],
+    "random_contrast": [{
+        "args": {
+            "lower": "lower",
+            "upper": "upper",
+            "contrast_ratio": "ratio"
+        }
+    }],
+    "padding": [{
+        "args": {
+            "pad": "pad",
+            "pad_ratio": "ratio",
+            "mean": ["normalize", "mean_value"],
+            "allow_outside_center": "allow_outside_center"
+        }
+    }],
+    "random_brightness": [{
+        "args": {
+            "shift_value": "shift_value",
+            "brightness_ratio": "ratio"
+        }
+    }],
+    "random_hflip": [{
+        "args": {
+            "swap_pair": "swap_pair",
+            "flip_ratio": "ratio"
+        }
+    }],
+    "random_resize": [
+        {
+            "args": {
+                "method": "method",
+                "scale_range": "scale_range",
+                "aspect_range": "aspect_range",
+                "max_side_bound": "max_side_bound",
+                "resize_ratio": "ratio"
+            },
+            "when": {
+                "method": "random"
+            }
+        },
+        {
+            "args": {
+                "method": "method",
+                "scale_range": "scale_range",
+                "aspect_range": "aspect_range",
+                "target_size": "target_size",
+                "resize_ratio": "ratio"
+            },
+            "when": {
+                "method": "focus"
+            }
+        },
+        {
+            "args": {
+                "method": "method",
+                "aspect_range": "aspect_range",
+                "resize_bound": "resize_bound",
+                "resize_ratio": "ratio"
+            },
+            "when": {
+                "method": "bound"
+            }
+        },
+    ],
+    "random_crop": [
+        {
+            "args": {
+                "crop_size": "crop_size",
+                "method": "method",
+                "crop_ratio": "ratio",
+                "allow_outside_center": "allow_outside_center"
+            },
+            "when": {
+                "method": "random"
+            }
+        },
+        {
+            "args": {
+                "crop_size": "crop_size",
+                "method": "method",
+                "crop_ratio": "ratio",
+                "allow_outside_center": "allow_outside_center"
+            },
+            "when": {
+                "method": "center"
+            }
+        },
+        {
+            "args": {
+                "crop_size": "crop_size",
+                "method": "method",
+                "crop_ratio": "ratio",
+                "grid": "grid",
+                "allow_outside_center": "allow_outside_center"
+            },
+            "when": {
+                "method": "grid"
+            }
+        },
+    ],
+    "random_rotate": [{
+        "args": {
+            "max_degree": "rotate_degree",
+            "rotate_ratio": "ratio",
+            "mean": ["normalize", "mean_value"]
+        }
+    }],
+    "resize": [{
+        "args": {
+            "target_size": "target_size",
+            "min_side_length": "min_side_length",
+            "max_side_bound": "max_side_bound",
+            "max_side_length": "max_side_length"
+        }
+    }],
+}

core/data/datasets/images/seg_data_tools/transforms.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import numpy as np
+import torch
+from PIL import Image
+class Normalize(object):
+    """Normalize a ``torch.tensor``
+    Args:
+        inputs (torch.tensor): tensor to be normalized.
+        mean: (list): the mean of RGB
+        std: (list): the std of RGB
+    Returns:
+        Tensor: Normalized tensor.
+    """
+    def __init__(self, div_value, mean, std):
+        self.div_value = div_value
+        self.mean = mean
+        self.std =std
+    def __call__(self, inputs):
+        inputs = inputs.div(self.div_value)
+        for t, m, s in zip(inputs, self.mean, self.std):
+            t.sub_(m).div_(s)
+        return inputs
+class DeNormalize(object):
+    """DeNormalize a ``torch.tensor``
+    Args:
+        inputs (torch.tensor): tensor to be normalized.
+        mean: (list): the mean of RGB
+        std: (list): the std of RGB
+    Returns:
+        Tensor: Normalized tensor.
+    """
+    def __init__(self, div_value, mean, std):
+        self.div_value = div_value
+        self.mean = mean
+        self.std =std
+    def __call__(self, inputs):
+        result = inputs.clone()
+        for i in range(result.size(0)):
+            result[i, :, :] = result[i, :, :] * self.std[i] + self.mean[i]
+        return result.mul_(self.div_value)
+class ToTensor(object):
+    """Convert a ``numpy.ndarray or Image`` to tensor.
+    See ``ToTensor`` for more details.
+    Args:
+        inputs (numpy.ndarray or Image): Image to be converted to tensor.
+    Returns:
+        Tensor: Converted image.
+    """
+    def __call__(self, inputs):
+        if isinstance(inputs, Image.Image):
+            channels = len(inputs.mode)
+            inputs = np.array(inputs)
+            inputs = inputs.reshape(inputs.shape[0], inputs.shape[1], channels)
+            inputs = torch.from_numpy(inputs.transpose(2, 0, 1))
+        else:
+            inputs = torch.from_numpy(inputs.transpose(2, 0, 1))
+        return inputs.float()
+class ToLabel(object):
+    def __call__(self, inputs):
+        return torch.from_numpy(np.array(inputs)).long()
+class ReLabel(object):
+    """
+      255 indicate the background, relabel 255 to some value.
+    """
+    def __init__(self, olabel, nlabel):
+        self.olabel = olabel
+        self.nlabel = nlabel
+    def __call__(self, inputs):
+        assert isinstance(inputs, torch.LongTensor), 'tensor needs to be LongTensor'
+        inputs[inputs == self.olabel] = self.nlabel
+        return inputs
+class Compose(object):
+    def __init__(self, transforms):
+        self.transforms = transforms
+    def __call__(self, inputs):
+        for t in self.transforms:
+            inputs = t(inputs)
+        return inputs

core/data/datasets/images/seg_dataset_dev.py ADDED Viewed

	@@ -0,0 +1,293 @@

+import os
+import os.path as osp
+import cv2
+import torch
+import numpy as np
+import itertools
+from typing import Any, Dict, List, Tuple, Union
+from torch.utils import data
+from torch.nn import functional as F
+from PIL import Image
+from pathlib import Path
+import core.data.transforms.seg_aug_dev as T
+from core.data.transforms.seg_transforms_dev import AugInput, apply_transform_gens
+class Instances:
+    """
+    This class represents a list of instances in an image.
+    It stores the attributes of instances (e.g., boxes, masks, labels, scores) as "fields".
+    All fields must have the same ``__len__`` which is the number of instances.
+    All other (non-field) attributes of this class are considered private:
+    they must start with '_' and are not modifiable by a user.
+    Some basic usage:
+    1. Set/get/check a field:
+       .. code-block:: python
+          instances.gt_boxes = Boxes(...)
+          print(instances.pred_masks)  # a tensor of shape (N, H, W)
+          print('gt_masks' in instances)
+    2. ``len(instances)`` returns the number of instances
+    3. Indexing: ``instances[indices]`` will apply the indexing on all the fields
+       and returns a new :class:`Instances`.
+       Typically, ``indices`` is a integer vector of indices,
+       or a binary mask of length ``num_instances``
+       .. code-block:: python
+          category_3_detections = instances[instances.pred_classes == 3]
+          confident_detections = instances[instances.scores > 0.9]
+    """
+    def __init__(self, image_size: Tuple[int, int], **kwargs: Any):
+        """
+        Args:
+            image_size (height, width): the spatial size of the image.
+            kwargs: fields to add to this `Instances`.
+        """
+        self._image_size = image_size
+        self._fields: Dict[str, Any] = {}
+        for k, v in kwargs.items():
+            self.set(k, v)
+    @property
+    def image_size(self) -> Tuple[int, int]:
+        """
+        Returns:
+            tuple: height, width
+        """
+        return self._image_size
+    def __setattr__(self, name: str, val: Any) -> None:
+        if name.startswith("_"):
+            super().__setattr__(name, val)
+        else:
+            self.set(name, val)
+    def __getattr__(self, name: str) -> Any:
+        if name == "_fields" or name not in self._fields:
+            raise AttributeError("Cannot find field '{}' in the given Instances!".format(name))
+        return self._fields[name]
+    def set(self, name: str, value: Any) -> None:
+        """
+        Set the field named `name` to `value`.
+        The length of `value` must be the number of instances,
+        and must agree with other existing fields in this object.
+        """
+        data_len = len(value)
+        if len(self._fields):
+            assert (
+                    len(self) == data_len
+            ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self))
+        self._fields[name] = value
+    def has(self, name: str) -> bool:
+        """
+        Returns:
+            bool: whether the field called `name` exists.
+        """
+        return name in self._fields
+    def remove(self, name: str) -> None:
+        """
+        Remove the field called `name`.
+        """
+        del self._fields[name]
+    def get(self, name: str) -> Any:
+        """
+        Returns the field called `name`.
+        """
+        return self._fields[name]
+    def get_fields(self) -> Dict[str, Any]:
+        """
+        Returns:
+            dict: a dict which maps names (str) to data of the fields
+        Modifying the returned dict will modify this instance.
+        """
+        return self._fields
+    # Tensor-like methods
+    def cuda(self, *args: Any, **kwargs: Any) -> "Instances":
+        """
+        Returns:
+            Instances: all fields are called with a `cuda`, if the field has this method.
+        """
+        ret = Instances(self._image_size)
+        for k, v in self._fields.items():
+            if hasattr(v, "cuda"):
+                v = v.cuda(*args, **kwargs)
+            ret.set(k, v)
+        return ret
+    def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Instances":
+        """
+        Args:
+            item: an index-like object and will be used to index all the fields.
+        Returns:
+            If `item` is a string, return the data in the corresponding field.
+            Otherwise, returns an `Instances` where all fields are indexed by `item`.
+        """
+        if type(item) == int:
+            if item >= len(self) or item < -len(self):
+                raise IndexError("Instances index out of range!")
+            else:
+                item = slice(item, None, len(self))
+        ret = Instances(self._image_size)
+        for k, v in self._fields.items():
+            ret.set(k, v[item])
+        return ret
+    def __len__(self) -> int:
+        for v in self._fields.values():
+            # use __len__ because len() has to be int and is not friendly to tracing
+            return v.__len__()
+        raise NotImplementedError("Empty Instances does not support __len__!")
+    def __iter__(self):
+        raise NotImplementedError("`Instances` object is not iterable!")
+    @staticmethod
+    def cat(instance_lists: List["Instances"]) -> "Instances":
+        """
+        Args:
+            instance_lists (list[Instances])
+        Returns:
+            Instances
+        """
+        assert all(isinstance(i, Instances) for i in instance_lists)
+        assert len(instance_lists) > 0
+        if len(instance_lists) == 1:
+            return instance_lists[0]
+        image_size = instance_lists[0].image_size
+        if not isinstance(image_size, torch.Tensor):  # could be a tensor in tracing
+            for i in instance_lists[1:]:
+                assert i.image_size == image_size
+        ret = Instances(image_size)
+        for k in instance_lists[0]._fields.keys():
+            values = [i.get(k) for i in instance_lists]
+            v0 = values[0]
+            if isinstance(v0, torch.Tensor):
+                values = torch.cat(values, dim=0)
+            elif isinstance(v0, list):
+                values = list(itertools.chain(*values))
+            elif hasattr(type(v0), "cat"):
+                values = type(v0).cat(values)
+            else:
+                raise ValueError("Unsupported type {} for concatenation".format(type(v0)))
+            ret.set(k, values)
+        return ret
+    def __str__(self) -> str:
+        s = self.__class__.__name__ + "("
+        s += "num_instances={}, ".format(len(self))
+        s += "image_height={}, ".format(self._image_size[0])
+        s += "image_width={}, ".format(self._image_size[1])
+        s += "fields=[{}])".format(", ".join((f"{k}: {v}" for k, v in self._fields.items())))
+        return s
+    __repr__ = __str__
+class BitMasks:
+    """
+    This class stores the segmentation masks for all objects in one image, in
+    the form of bitmaps.
+    Attributes:
+        tensor: bool Tensor of N,H,W, representing N instances in the image.
+    """
+    def __init__(self, tensor: Union[torch.Tensor, np.ndarray]):
+        """
+        Args:
+            tensor: bool Tensor of N,H,W, representing N instances in the image.
+        """
+        device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu")
+        tensor = torch.as_tensor(tensor, dtype=torch.bool, device=device)
+        assert tensor.dim() == 3, tensor.size()
+        self.image_size = tensor.shape[1:]
+        self.tensor = tensor
+    def to(self, *args: Any, **kwargs: Any) -> "BitMasks":
+        return BitMasks(self.tensor.to(*args, **kwargs))
+    @property
+    def device(self) -> torch.device:
+        return self.tensor.device
+    def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "BitMasks":
+        """
+        Returns:
+            BitMasks: Create a new :class:`BitMasks` by indexing.
+        The following usage are allowed:
+        1. `new_masks = masks[3]`: return a `BitMasks` which contains only one mask.
+        2. `new_masks = masks[2:10]`: return a slice of masks.
+        3. `new_masks = masks[vector]`, where vector is a torch.BoolTensor
+           with `length = len(masks)`. Nonzero elements in the vector will be selected.
+        Note that the returned object might share storage with this object,
+        subject to Pytorch's indexing semantics.
+        """
+        if isinstance(item, int):
+            return BitMasks(self.tensor[item].unsqueeze(0))
+        m = self.tensor[item]
+        assert m.dim() == 3, "Indexing on BitMasks with {} returns a tensor with shape {}!".format(
+            item, m.shape
+        )
+        return BitMasks(m)
+    def __iter__(self) -> torch.Tensor:
+        yield from self.tensor
+    def __repr__(self) -> str:
+        s = self.__class__.__name__ + "("
+        s += "num_instances={})".format(len(self.tensor))
+        return s
+    def __len__(self) -> int:
+        return self.tensor.shape[0]
+    def nonempty(self) -> torch.Tensor:
+        """
+        Find masks that are non-empty.
+        Returns:
+            Tensor: a BoolTensor which represents
+                whether each mask is empty (False) or non-empty (True).
+        """
+        return self.tensor.flatten(1).any(dim=1)
+    @staticmethod
+    def cat(bitmasks_list: List["BitMasks"]) -> "BitMasks":
+        """
+        Concatenates a list of BitMasks into a single BitMasks
+        Arguments:
+            bitmasks_list (list[BitMasks])
+        Returns:
+            BitMasks: the concatenated BitMasks
+        """
+        assert isinstance(bitmasks_list, (list, tuple))
+        assert len(bitmasks_list) > 0
+        assert all(isinstance(bitmask, BitMasks) for bitmask in bitmasks_list)
+        cat_bitmasks = type(bitmasks_list[0])(torch.cat([bm.tensor for bm in bitmasks_list], dim=0))
+        return cat_bitmasks

core/data/datasets/images/smpl_data_tools/__pycache__/_smpl.cpython-312.pyc ADDED Viewed

Binary file (22.5 kB). View file

core/data/datasets/images/smpl_data_tools/__pycache__/config_smpl.cpython-312.pyc ADDED Viewed

Binary file (2.35 kB). View file

core/data/datasets/images/smpl_data_tools/__pycache__/image_ops.cpython-312.pyc ADDED Viewed

Binary file (12.1 kB). View file

core/data/datasets/images/smpl_data_tools/__pycache__/tsv_file.cpython-312.pyc ADDED Viewed

Binary file (11.4 kB). View file

core/data/datasets/images/smpl_data_tools/_smpl.py ADDED Viewed

	@@ -0,0 +1,333 @@

+"""
+This file contains the definition of the SMPL model
+It is adapted from opensource project GraphCMR (https://github.com/nkolot/GraphCMR/)
+"""
+from __future__ import division
+import torch
+import torch.nn as nn
+import numpy as np
+import scipy.sparse
+import pickle
+from . import config_smpl as cfg
+def rodrigues(theta):
+    """Convert axis-angle representation to rotation matrix.
+    Args:
+        theta: size = [B, 3]
+    Returns:
+        Rotation matrix corresponding to the quaternion -- size = [B, 3, 3]
+    """
+    l1norm = torch.norm(theta + 1e-8, p = 2, dim = 1)
+    angle = torch.unsqueeze(l1norm, -1)
+    normalized = torch.div(theta, angle)
+    angle = angle * 0.5
+    v_cos = torch.cos(angle)
+    v_sin = torch.sin(angle)
+    quat = torch.cat([v_cos, v_sin * normalized], dim = 1)
+    return quat2mat(quat)
+def quat2mat(quat):
+    """Convert quaternion coefficients to rotation matrix.
+    Args:
+        quat: size = [B, 4] 4 <===>(w, x, y, z)
+    Returns:
+        Rotation matrix corresponding to the quaternion -- size = [B, 3, 3]
+    """
+    norm_quat = quat
+    norm_quat = norm_quat/norm_quat.norm(p=2, dim=1, keepdim=True)
+    w, x, y, z = norm_quat[:,0], norm_quat[:,1], norm_quat[:,2], norm_quat[:,3]
+    B = quat.size(0)
+    w2, x2, y2, z2 = w.pow(2), x.pow(2), y.pow(2), z.pow(2)
+    wx, wy, wz = w*x, w*y, w*z
+    xy, xz, yz = x*y, x*z, y*z
+    rotMat = torch.stack([w2 + x2 - y2 - z2, 2*xy - 2*wz, 2*wy + 2*xz,
+                          2*wz + 2*xy, w2 - x2 + y2 - z2, 2*yz - 2*wx,
+                          2*xz - 2*wy, 2*wx + 2*yz, w2 - x2 - y2 + z2], dim=1).view(B, 3, 3)
+    return rotMat
+def orthographic_projection(X, camera):
+    """Perform orthographic projection of 3D points X using the camera parameters
+    Args:
+        X: size = [B, N, 3]
+        camera: size = [B, 3]
+    Returns:
+        Projected 2D points -- size = [B, N, 2]
+    """
+    camera = camera.view(-1, 1, 3)
+    X_trans = X[:, :, :2] + camera[:, :, 1:]
+    shape = X_trans.shape
+    X_2d = (camera[:, :, 0] * X_trans.view(shape[0], -1)).view(shape)
+    return X_2d
+class SMPL(nn.Module):
+    def __init__(self, gender='neutral'):
+        super(SMPL, self).__init__()
+        if gender=='m':
+            model_file=cfg.SMPL_Male
+        elif gender=='f':
+            model_file=cfg.SMPL_Female
+        else:
+            model_file=cfg.SMPL_FILE
+        smpl_model = pickle.load(open(model_file, 'rb'), encoding='latin1')
+        J_regressor = smpl_model['J_regressor'].tocoo()
+        row = J_regressor.row
+        col = J_regressor.col
+        data = J_regressor.data
+        i = torch.LongTensor([row, col])
+        v = torch.FloatTensor(data)
+        J_regressor_shape = [24, 6890]
+        self.register_buffer('J_regressor', torch.sparse.FloatTensor(i, v, J_regressor_shape).to_dense()) # 24*6890
+        self.register_buffer('weights', torch.FloatTensor(smpl_model['weights'])) # 6890 * 24
+        self.register_buffer('posedirs', torch.FloatTensor(smpl_model['posedirs'])) # 6890*3*207
+        self.register_buffer('v_template', torch.FloatTensor(smpl_model['v_template'])) # 6890*3
+        self.register_buffer('shapedirs', torch.FloatTensor(np.array(smpl_model['shapedirs']))) # # 6890*3*10
+        self.register_buffer('faces', torch.from_numpy(smpl_model['f'].astype(np.int64))) # 13776 * 3
+        self.register_buffer('kintree_table', torch.from_numpy(smpl_model['kintree_table'].astype(np.int64))) # 2*24
+        id_to_col = {self.kintree_table[1, i].item(): i for i in range(self.kintree_table.shape[1])}
+        self.register_buffer('parent', torch.LongTensor([id_to_col[self.kintree_table[0, it].item()] for it in range(1, self.kintree_table.shape[1])]))
+        self.pose_shape = [24, 3]
+        self.beta_shape = [10]
+        self.translation_shape = [3]
+        self.pose = torch.zeros(self.pose_shape)
+        self.beta = torch.zeros(self.beta_shape)
+        self.translation = torch.zeros(self.translation_shape)
+        self.verts = None
+        self.J = None
+        self.R = None
+        J_regressor_extra = torch.from_numpy(np.load(cfg.JOINT_REGRESSOR_TRAIN_EXTRA)).float() # 14*6890
+        self.register_buffer('J_regressor_extra', J_regressor_extra)
+        self.joints_idx = cfg.JOINTS_IDX
+        J_regressor_h36m_correct = torch.from_numpy(np.load(cfg.JOINT_REGRESSOR_H36M_correct)).float() # 17*6890
+        self.register_buffer('J_regressor_h36m_correct', J_regressor_h36m_correct)
+    def forward(self, pose, beta):
+        device = pose.device
+        batch_size = pose.shape[0]
+        v_template = self.v_template[None, :]
+        shapedirs = self.shapedirs.view(-1,10)[None, :].expand(batch_size, -1, -1)
+        beta = beta[:, :, None]
+        # print(f'pose device {pose.device} beta device {beta.device} smpl parameter device {shapedirs.device}')
+        v_shaped = torch.matmul(shapedirs, beta).view(-1, 6890, 3) + v_template
+        # batched sparse matmul not supported in pytorch
+        J = []
+        for i in range(batch_size):
+            J.append(torch.matmul(self.J_regressor, v_shaped[i]))
+        J = torch.stack(J, dim=0)
+        # input it rotmat: (bs,24,3,3)
+        if pose.ndimension() == 4:
+            R = pose
+        # input it rotmat: (bs,72)
+        elif pose.ndimension() == 2:
+            pose_cube = pose.view(-1, 3) # (batch_size * 24, 1, 3)
+            R = rodrigues(pose_cube).view(batch_size, 24, 3, 3)
+            R = R.view(batch_size, 24, 3, 3)
+        I_cube = torch.eye(3)[None, None, :].to(device)
+        lrotmin = (R[:,1:,:] - I_cube).view(batch_size, -1)
+        posedirs = self.posedirs.view(-1,207)[None, :].expand(batch_size, -1, -1)
+        v_posed = v_shaped + torch.matmul(posedirs, lrotmin[:, :, None]).view(-1, 6890, 3)
+        J_ = J.clone()
+        J_[:, 1:, :] = J[:, 1:, :] - J[:, self.parent, :]
+        G_ = torch.cat([R, J_[:, :, :, None]], dim=-1)
+        pad_row = torch.FloatTensor([0,0,0,1]).to(device).view(1,1,1,4).expand(batch_size, 24, -1, -1)
+        G_ = torch.cat([G_, pad_row], dim=2)
+        G = [G_[:, 0].clone()]
+        for i in range(1, 24):
+            G.append(torch.matmul(G[self.parent[i-1]], G_[:, i, :, :]))
+        G = torch.stack(G, dim=1)
+        rest = torch.cat([J, torch.zeros(batch_size, 24, 1).to(device)], dim=2).view(batch_size, 24, 4, 1)
+        zeros = torch.zeros(batch_size, 24, 4, 3).to(device)
+        rest = torch.cat([zeros, rest], dim=-1)
+        rest = torch.matmul(G, rest)
+        G = G - rest
+        T = torch.matmul(self.weights, G.permute(1,0,2,3).contiguous().view(24,-1)).view(6890, batch_size, 4, 4).transpose(0,1)
+        rest_shape_h = torch.cat([v_posed, torch.ones_like(v_posed)[:, :, [0]]], dim=-1)
+        v = torch.matmul(T, rest_shape_h[:, :, :, None])[:, :, :3, 0]
+        return v
+    def get_joints(self, vertices):
+        """
+        This method is used to get the joint locations from the SMPL mesh
+        Input:
+            vertices: size = (B, 6890, 3)
+        Output:
+            3D joints: size = (B, 38, 3)
+        """
+        joints = torch.einsum('bik,ji->bjk', [vertices, self.J_regressor])
+        joints_extra = torch.einsum('bik,ji->bjk', [vertices, self.J_regressor_extra])
+        joints = torch.cat((joints, joints_extra), dim=1)
+        joints = joints[:, cfg.JOINTS_IDX]
+        return joints
+    def get_h36m_joints(self, vertices):
+        """
+        This method is used to get the joint locations from the SMPL mesh
+        Input:
+            vertices: size = (B, 6890, 3)
+        Output:
+            3D joints: size = (B, 17, 3)
+        """
+        joints = torch.einsum('bik,ji->bjk', [vertices, self.J_regressor_h36m_correct])
+        return joints
+class SparseMM(torch.autograd.Function):
+    """Redefine sparse @ dense matrix multiplication to enable backpropagation.
+    The builtin matrix multiplication operation does not support backpropagation in some cases.
+    """
+    @staticmethod
+    def forward(ctx, sparse, dense):
+        ctx.req_grad = dense.requires_grad
+        ctx.save_for_backward(sparse)
+        return torch.matmul(sparse, dense)
+    @staticmethod
+    def backward(ctx, grad_output):
+        grad_input = None
+        sparse, = ctx.saved_tensors
+        if ctx.req_grad:
+            grad_input = torch.matmul(sparse.t(), grad_output)
+        return None, grad_input
+def spmm(sparse, dense):
+    return SparseMM.apply(sparse, dense)
+def scipy_to_pytorch(A, U, D):
+    """Convert scipy sparse matrices to pytorch sparse matrix."""
+    ptU = []
+    ptD = []
+    for i in range(len(U)):
+        u = scipy.sparse.coo_matrix(U[i])
+        i = torch.LongTensor(np.array([u.row, u.col]))
+        v = torch.FloatTensor(u.data)
+        # return index value and shape instead of a sparse tensor to avoid bug in multi-worker
+        ptU.append([i, v, u.shape])
+    for i in range(len(D)):
+        d = scipy.sparse.coo_matrix(D[i])
+        i = torch.LongTensor(np.array([d.row, d.col]))
+        v = torch.FloatTensor(d.data)
+        # return index value and shape instead of a sparse tensor to avoid bug in multi-worker
+        ptD.append([i, v, d.shape])
+    return ptU, ptD
+def adjmat_sparse(adjmat, nsize=1):
+    """Create row-normalized sparse graph adjacency matrix."""
+    adjmat = scipy.sparse.csr_matrix(adjmat)
+    if nsize > 1:
+        orig_adjmat = adjmat.copy()
+        for _ in range(1, nsize):
+            adjmat = adjmat * orig_adjmat
+    adjmat.data = np.ones_like(adjmat.data)
+    for i in range(adjmat.shape[0]):
+        adjmat[i,i] = 1
+    num_neighbors = np.array(1 / adjmat.sum(axis=-1))
+    adjmat = adjmat.multiply(num_neighbors)
+    adjmat = scipy.sparse.coo_matrix(adjmat)
+    row = adjmat.row
+    col = adjmat.col
+    data = adjmat.data
+    i = torch.LongTensor(np.array([row, col]))
+    v = torch.from_numpy(data).float()
+    # adjmat = torch.sparse.FloatTensor(i, v, adjmat.shape)
+    # return index value and shape instead of a sparse tensor to avoid bug in multi-worker
+    return [i, v, adjmat.shape]
+def get_graph_params(filename, nsize=1):
+    """Load and process graph adjacency matrix and upsampling/downsampling matrices."""
+    data = np.load(filename, encoding='latin1', allow_pickle=True)
+    A = data['A']
+    U = data['U']
+    D = data['D']
+    U, D = scipy_to_pytorch(A, U, D)
+    A = [adjmat_sparse(a, nsize=nsize) for a in A]
+    return A, U, D
+class Mesh(object):
+    """Mesh object that is used for handling certain graph operations."""
+    def __init__(self, filename=cfg.SMPL_sampling_matrix,
+                 num_downsampling=1, nsize=1, device=torch.device('cuda')):
+        super(Mesh, self).__init__()
+        self.device = device
+        self._A, self._U, self._D = get_graph_params(filename=filename, nsize=nsize)
+        # self._A = [a.to(device) for a in self._A]
+        # self._U = [u.to(device) for u in self._U]
+        # self._D = [d.to(device) for d in self._D]
+        self.num_downsampling = num_downsampling
+        # load template vertices from SMPL and normalize them
+        smpl = SMPL()
+        ref_vertices = smpl.v_template
+        center = 0.5*(ref_vertices.max(dim=0)[0] + ref_vertices.min(dim=0)[0])[None]
+        ref_vertices -= center
+        ref_vertices /= ref_vertices.abs().max().item()
+        self._ref_vertices = ref_vertices.to(device)
+        self.faces = smpl.faces.int().to(device)
+        self.sparse = False
+    @property
+    def ref_vertices(self):
+        """Return the template vertices at the specified subsampling level."""
+        _D = [torch.sparse.FloatTensor(item[0], item[1], item[2]).to(self.device) for item in self._D]
+        ref_vertices = self._ref_vertices
+        for i in range(self.num_downsampling):
+            ref_vertices = torch.spmm(_D[i], ref_vertices)
+        return ref_vertices
+    def downsample(self, x, n1=0, n2=None):
+        _D = [torch.sparse.FloatTensor(item[0], item[1], item[2]).to(self.device) for item in self._D]
+        """Downsample mesh."""
+        if n2 is None:
+            n2 = self.num_downsampling
+        if x.ndimension() < 3:
+            for i in range(n1, n2):
+                x = spmm(_D[i], x)
+        elif x.ndimension() == 3:
+            out = []
+            for i in range(x.shape[0]):
+                y = x[i]
+                for j in range(n1, n2):
+                    y = spmm(_D[j], y)
+                out.append(y)
+            x = torch.stack(out, dim=0)
+        return x
+    def upsample(self, x, n1=1, n2=0):
+        _U = [torch.sparse.FloatTensor(item[0], item[1], item[2]).to(self.device) for item in self._U]
+        """Upsample mesh."""
+        if x.ndimension() < 3:
+            for i in reversed(range(n2, n1)):
+                x = spmm(_U[i], x)
+        elif x.ndimension() == 3:
+            out = []
+            for i in range(x.shape[0]):
+                y = x[i]
+                for j in reversed(range(n2, n1)):
+                    y = spmm(_U[j], y)
+                out.append(y)
+            x = torch.stack(out, dim=0)
+        return x

core/data/datasets/images/smpl_data_tools/config_smpl.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""
+This file contains definitions of useful data stuctures and the paths
+for the datasets and data files necessary to run the code.
+Adapted from opensource project GraphCMR (https://github.com/nkolot/GraphCMR/) and Pose2Mesh (https://github.com/hongsukchoi/Pose2Mesh_RELEASE)
+"""
+from os.path import join,split,abspath
+# from os import getcwd
+import sys
+dirname, filename = split(abspath(sys.argv[0]))
+folder_path = join(dirname,'core/data/datasets/images/smpl_data_tools/smpl_modeling/')
+# print("current path {} ".format(folder_path))
+JOINT_REGRESSOR_TRAIN_EXTRA = folder_path + 'data/J_regressor_extra.npy'
+JOINT_REGRESSOR_H36M_correct = folder_path + 'data/J_regressor_h36m_correct.npy'
+SMPL_FILE = folder_path + 'data/basicModel_neutral_lbs_10_207_0_v1.0.0.pkl'
+SMPL_Male = folder_path + 'data/basicModel_m_lbs_10_207_0_v1.0.0.pkl'
+SMPL_Female = folder_path + 'data/basicModel_f_lbs_10_207_0_v1.0.0.pkl'
+SMPL_sampling_matrix = folder_path + 'data/mesh_downsampling.npz'
+MANO_FILE = folder_path + 'data/MANO_RIGHT.pkl'
+MANO_sampling_matrix = folder_path + 'data/mano_downsampling.npz'
+JOINTS_IDX = [8, 5, 29, 30, 4, 7, 21, 19, 17, 16, 18, 20, 31, 32, 33, 34, 35, 36, 37, 24, 26, 25, 28, 27]
+"""
+We follow the body joint definition, loss functions, and evaluation metrics from
+open source project GraphCMR (https://github.com/nkolot/GraphCMR/)
+Each dataset uses different sets of joints.
+We use a superset of 24 joints such that we include all joints from every dataset.
+If a dataset doesn't provide annotations for a specific joint, we simply ignore it.
+The joints used here are:
+"""
+J24_NAME = ('R_Ankle', 'R_Knee', 'R_Hip', 'L_Hip', 'L_Knee', 'L_Ankle', 'R_Wrist', 'R_Elbow', 'R_Shoulder', 'L_Shoulder',
+'L_Elbow','L_Wrist','Neck','Top_of_Head','Pelvis','Thorax','Spine','Jaw','Head','Nose','L_Eye','R_Eye','L_Ear','R_Ear')
+H36M_J17_NAME = ( 'Pelvis', 'R_Hip', 'R_Knee', 'R_Ankle', 'L_Hip', 'L_Knee', 'L_Ankle', 'Torso', 'Neck', 'Nose', 'Head',
+                  'L_Shoulder', 'L_Elbow', 'L_Wrist', 'R_Shoulder', 'R_Elbow', 'R_Wrist')
+J24_TO_J14 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 18]
+H36M_J17_TO_J14 = [3, 2, 1, 4, 5, 6, 16, 15, 14, 11, 12, 13, 8, 10]
+"""
+We follow the hand joint definition and mesh topology from
+open source project Manopth (https://github.com/hassony2/manopth)
+The hand joints used here are:
+"""
+J_NAME = ('Wrist', 'Thumb_1', 'Thumb_2', 'Thumb_3', 'Thumb_4', 'Index_1', 'Index_2', 'Index_3', 'Index_4', 'Middle_1',
+'Middle_2', 'Middle_3', 'Middle_4', 'Ring_1', 'Ring_2', 'Ring_3', 'Ring_4', 'Pinky_1', 'Pinky_2', 'Pinky_3', 'Pinky_4')
+ROOT_INDEX = 0

core/data/datasets/images/smpl_data_tools/image_ops.py ADDED Viewed

	@@ -0,0 +1,230 @@

+# ----------------------------------------------------------------------------------------------
+# METRO (https://github.com/microsoft/MeshTransformer)
+# Copyright (c) Microsoft Corporation. All Rights Reserved [see https://github.com/microsoft/MeshTransformer/blob/main/LICENSE for details]
+# Licensed under the MIT license.
+# ----------------------------------------------------------------------------------------------
+"""
+Image processing tools
+Modified from open source projects:
+(https://github.com/nkolot/GraphCMR/)
+(https://github.com/open-mmlab/mmdetection)
+"""
+import numpy as np
+import base64
+import cv2
+import torch
+import scipy.misc
+def img_from_base64(imagestring):
+    try:
+        jpgbytestring = base64.b64decode(imagestring)
+        nparr = np.frombuffer(jpgbytestring, np.uint8)
+        r = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+        return r
+    except ValueError:
+        return None
+def myimrotate(img, angle, center=None, scale=1.0, border_value=0, auto_bound=False):
+    if center is not None and auto_bound:
+        raise ValueError('`auto_bound` conflicts with `center`')
+    try:
+        h, w = img.shape[:2]
+    except:
+        h, w = img.size[:2]
+    if center is None:
+        center = ((w - 1) * 0.5, (h - 1) * 0.5)
+    assert isinstance(center, tuple)
+    matrix = cv2.getRotationMatrix2D(center, angle, scale)
+    if auto_bound:
+        cos = np.abs(matrix[0, 0])
+        sin = np.abs(matrix[0, 1])
+        new_w = h * sin + w * cos
+        new_h = h * cos + w * sin
+        matrix[0, 2] += (new_w - w) * 0.5
+        matrix[1, 2] += (new_h - h) * 0.5
+        w = int(np.round(new_w))
+        h = int(np.round(new_h))
+    rotated = cv2.warpAffine(img, matrix, (w, h), borderValue=border_value)
+    return rotated
+def myimresize(img, size, return_scale=False, interpolation='bilinear'):
+    try:
+        h, w = img.shape[:2]
+    except:
+        h, w = img.size[:2]
+    resized_img = cv2.resize(
+        img, (size[0],size[1]), interpolation=cv2.INTER_LINEAR)
+    if not return_scale:
+        return resized_img
+    else:
+        w_scale = size[0] / w
+        h_scale = size[1] / h
+        return resized_img, w_scale, h_scale
+def get_transform(center, scale, res, rot=0):
+    """Generate transformation matrix."""
+    h = 200 * scale
+    t = np.zeros((3, 3))
+    t[0, 0] = float(res[1]) / h
+    t[1, 1] = float(res[0]) / h
+    t[0, 2] = res[1] * (-float(center[0]) / h + .5)
+    t[1, 2] = res[0] * (-float(center[1]) / h + .5)
+    t[2, 2] = 1
+    if not rot == 0:
+        rot = -rot # To match direction of rotation from cropping
+        rot_mat = np.zeros((3,3))
+        rot_rad = rot * np.pi / 180
+        sn,cs = np.sin(rot_rad), np.cos(rot_rad)
+        rot_mat[0,:2] = [cs, -sn]
+        rot_mat[1,:2] = [sn, cs]
+        rot_mat[2,2] = 1
+        # Need to rotate around center
+        t_mat = np.eye(3)
+        t_mat[0,2] = -res[1]/2
+        t_mat[1,2] = -res[0]/2
+        t_inv = t_mat.copy()
+        t_inv[:2,2] *= -1
+        t = np.dot(t_inv,np.dot(rot_mat,np.dot(t_mat,t)))
+    return t
+def transform(pt, center, scale, res, invert=0, rot=0):
+    """Transform pixel location to different reference."""
+    t = get_transform(center, scale, res, rot=rot)
+    if invert:
+        # t = np.linalg.inv(t)
+        t_torch = torch.from_numpy(t)
+        t_torch = torch.inverse(t_torch)
+        t = t_torch.numpy()
+    new_pt = np.array([pt[0]-1, pt[1]-1, 1.]).T
+    new_pt = np.dot(t, new_pt)
+    return new_pt[:2].astype(int)+1
+def crop(img, center, scale, res, rot=0):
+    """Crop image according to the supplied bounding box."""
+    # Upper left point
+    ul = np.array(transform([1, 1], center, scale, res, invert=1))-1
+    # Bottom right point
+    br = np.array(transform([res[0]+1,
+                             res[1]+1], center, scale, res, invert=1))-1
+    # Padding so that when rotated proper amount of context is included
+    pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2)
+    if not rot == 0:
+        ul -= pad
+        br += pad
+    new_shape = [br[1] - ul[1], br[0] - ul[0]]
+    try:
+        image_shape = img.shape
+    except:
+        image_shape = img.size
+    if len(image_shape) > 2:
+        new_shape += [image_shape[2]]
+    new_img = np.zeros(new_shape)
+    # Range to fill new array
+    new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0]
+    new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1]
+    # Range to sample from original image
+    old_x = max(0, ul[0]), min(len(img[0]), br[0])
+    old_y = max(0, ul[1]), min(len(img), br[1])
+    new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1],
+                                                        old_x[0]:old_x[1]]
+    if not rot == 0:
+        # Remove padding
+        # new_img = scipy.misc.imrotate(new_img, rot)
+        new_img = myimrotate(new_img, rot)
+        new_img = new_img[pad:-pad, pad:-pad]
+    # new_img = scipy.misc.imresize(new_img, res)
+    new_img = myimresize(new_img, [res[0], res[1]])
+    return new_img
+def uncrop(img, center, scale, orig_shape, rot=0, is_rgb=True):
+    """'Undo' the image cropping/resizing.
+    This function is used when evaluating mask/part segmentation.
+    """
+    try:
+        res = img.shape[:2]
+    except:
+        res = img.size[:2]
+    # Upper left point
+    ul = np.array(transform([1, 1], center, scale, res, invert=1))-1
+    # Bottom right point
+    br = np.array(transform([res[0]+1,res[1]+1], center, scale, res, invert=1))-1
+    # size of cropped image
+    crop_shape = [br[1] - ul[1], br[0] - ul[0]]
+    new_shape = [br[1] - ul[1], br[0] - ul[0]]
+    try:
+        image_shape = img.shape
+    except:
+        image_shape = img.size
+    if len(image_shape) > 2:
+        new_shape += [image_shape[2]]
+    new_img = np.zeros(orig_shape, dtype=np.uint8)
+    # Range to fill new array
+    new_x = max(0, -ul[0]), min(br[0], orig_shape[1]) - ul[0]
+    new_y = max(0, -ul[1]), min(br[1], orig_shape[0]) - ul[1]
+    # Range to sample from original image
+    old_x = max(0, ul[0]), min(orig_shape[1], br[0])
+    old_y = max(0, ul[1]), min(orig_shape[0], br[1])
+    # img = scipy.misc.imresize(img, crop_shape, interp='nearest')
+    img = myimresize(img, [crop_shape[0],crop_shape[1]])
+    new_img[old_y[0]:old_y[1], old_x[0]:old_x[1]] = img[new_y[0]:new_y[1], new_x[0]:new_x[1]]
+    return new_img
+def rot_aa(aa, rot):
+    """Rotate axis angle parameters."""
+    # pose parameters
+    R = np.array([[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0],
+                  [np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0],
+                  [0, 0, 1]])
+    # find the rotation of the body in camera frame
+    per_rdg, _ = cv2.Rodrigues(aa)
+    # apply the global rotation to the global orientation
+    resrot, _ = cv2.Rodrigues(np.dot(R,per_rdg))
+    aa = (resrot.T)[0]
+    return aa
+def flip_img(img):
+    """Flip rgb images or masks.
+    channels come last, e.g. (256,256,3).
+    """
+    img = np.fliplr(img)
+    return img
+def flip_kp(kp):
+    """Flip keypoints."""
+    flipped_parts = [5, 4, 3, 2, 1, 0, 11, 10, 9, 8, 7, 6, 12, 13, 14, 15, 16, 17, 18, 19, 21, 20, 23, 22]
+    kp = kp[flipped_parts]
+    kp[:,0] = - kp[:,0]
+    return kp
+def flip_pose(pose):
+    """Flip pose.
+    The flipping is based on SMPL parameters.
+    """
+    flippedParts = [0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11, 15, 16, 17, 12, 13,
+                    14 ,18, 19, 20, 24, 25, 26, 21, 22, 23, 27, 28, 29, 33,
+                    34, 35, 30, 31, 32, 36, 37, 38, 42, 43, 44, 39, 40, 41,
+                    45, 46, 47, 51, 52, 53, 48, 49, 50, 57, 58, 59, 54, 55,
+                    56, 63, 64, 65, 60, 61, 62, 69, 70, 71, 66, 67, 68]
+    pose = pose[flippedParts]
+    # we also negate the second and the third dimension of the axis-angle
+    pose[1::3] = -pose[1::3]
+    pose[2::3] = -pose[2::3]
+    return pose
+def flip_aa(aa):
+    """Flip axis-angle representation.
+    We negate the second and the third dimension of the axis-angle.
+    """
+    aa[1] = -aa[1]
+    aa[2] = -aa[2]
+    return aa

core/data/datasets/images/smpl_data_tools/smpl_modeling/data/J_regressor_extra.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40dfaa71fcc7eed6966a6ed046311b7e8ea0eb9a5172b298e3df6fc4b6ec0eb0
+size 771808