Spaces:

mingyuan
/

LMM

Runtime error

App Files Files Community

mingyuan commited on Dec 22, 2024

Commit

373af33

1 Parent(s): a73d3a6

initial commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +145 -0
.readthedocs.yaml +13 -0
app.py +202 -0
configs/lmm/lmm.py +75 -0
configs/lmm/lmm_small_demo.py +22 -0
examples/angry.m4a +0 -0
examples/placeholder.m4a +0 -0
examples/surprise.m4a +0 -0
mogen/__init__.py +56 -0
mogen/apis/__init__.py +8 -0
mogen/apis/test.py +158 -0
mogen/apis/train.py +161 -0
mogen/core/__init__.py +0 -0
mogen/core/distributed_wrapper.py +135 -0
mogen/core/optimizer/__init__.py +3 -0
mogen/core/optimizer/builder.py +52 -0
mogen/datasets/__init__.py +12 -0
mogen/datasets/base_dataset.py +183 -0
mogen/datasets/builder.py +149 -0
mogen/datasets/dataset_wrappers.py +42 -0
mogen/datasets/human_body_prior/__init__.py +22 -0
mogen/datasets/human_body_prior/body_model/__init__.py +22 -0
mogen/datasets/human_body_prior/body_model/body_model.py +281 -0
mogen/datasets/human_body_prior/body_model/lbs.py +404 -0
mogen/datasets/human_body_prior/body_model/parts_segm/readme +1 -0
mogen/datasets/human_body_prior/body_model/rigid_object_model.py +67 -0
mogen/datasets/human_body_prior/models/__init__.py +22 -0
mogen/datasets/human_body_prior/models/ik_engine.py +287 -0
mogen/datasets/human_body_prior/models/model_components.py +41 -0
mogen/datasets/human_body_prior/models/vposer_model.py +133 -0
mogen/datasets/human_body_prior/tools/__init__.py +22 -0
mogen/datasets/human_body_prior/tools/angle_continuous_repres.py +80 -0
mogen/datasets/human_body_prior/tools/configurations.py +47 -0
mogen/datasets/human_body_prior/tools/model_loader.py +87 -0
mogen/datasets/human_body_prior/tools/omni_tools.py +163 -0
mogen/datasets/human_body_prior/tools/rotation_tools.py +151 -0
mogen/datasets/human_body_prior/tools/tgm_conversion.py +527 -0
mogen/datasets/human_body_prior/train/README.md +41 -0
mogen/datasets/human_body_prior/train/V02_05/V02_05.py +54 -0
mogen/datasets/human_body_prior/train/V02_05/V02_05.yaml +84 -0
mogen/datasets/human_body_prior/train/V02_05/__init__.py +22 -0
mogen/datasets/human_body_prior/train/__init__.py +22 -0
mogen/datasets/human_body_prior/train/vposer_trainer.py +337 -0
mogen/datasets/human_body_prior/visualizations/__init__.py +22 -0
mogen/datasets/human_body_prior/visualizations/training_visualization.py +123 -0
mogen/datasets/motionverse_dataset.py +828 -0
mogen/datasets/paramUtil.py +140 -0
mogen/datasets/pipelines/__init__.py +30 -0
mogen/datasets/pipelines/compose.py +42 -0
mogen/datasets/pipelines/formatting.py +135 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,145 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+**/*.pyc
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/en/build
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+# custom
+data
+!mmhuman3d/data
+# data for pytest moved to http server
+# !tests/data
+.vscode
+.idea
+*.pkl
+*.pkl.json
+*.log.json
+work_dirs/
+logs/
+# Pytorch
+*.pth
+*.pt
+# Visualization
+*.mp4
+*.png
+*.gif
+*.jpg
+*.obj
+*.ply
+!demo/resources/*
+# Resources as exception
+!resources/*
+# Loaded/Saved data files
+*.npz
+*.npy
+*.pickle
+# MacOS
+*DS_Store*
+# git
+*.orig

.readthedocs.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+version: 2
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.9"
+sphinx:
+  configuration: docs/en/source/conf.py
+python:
+  install:
+    - requirements: requirements/docs.txt

app.py ADDED Viewed

	@@ -0,0 +1,202 @@

+import os
+import sys
+import gradio as gr
+import time
+os.makedirs("outputs", exist_ok=True)
+sys.path.insert(0, '.')
+import argparse
+import os.path as osp
+import mmcv
+import numpy as np
+import torch
+from mmcv.runner import load_checkpoint
+from mmcv.parallel import MMDataParallel
+from scipy.ndimage import gaussian_filter
+from IPython.display import Image
+from mogen.models.utils.imagebind_wrapper import (
+    extract_text_feature,
+    extract_audio_feature,
+    imagebind_huge
+)
+from mogen.models import build_architecture
+from mogen.utils.plot_utils import (
+    plot_3d_motion,
+    add_audio,
+    get_audio_length
+)
+from mogen.datasets.paramUtil import (
+    t2m_body_hand_kinematic_chain,
+    t2m_kinematic_chain
+)
+from mogen.datasets.utils import recover_from_ric
+from mogen.datasets.pipelines import RetargetSkeleton
+def motion_temporal_filter(motion, sigma=1):
+    motion = motion.reshape(motion.shape[0], -1)
+    for i in range(motion.shape[1]):
+        motion[:, i] = gaussian_filter(motion[:, i], sigma=sigma, mode="nearest")
+    return motion.reshape(motion.shape[0], -1, 3)
+def plot_tomato(data, kinematic_chain, result_path, npy_path, fps, sigma=None):
+    joints = recover_from_ric(torch.from_numpy(data).float(), 52).numpy()
+    joints = motion_temporal_filter(joints, sigma=2.5)
+    joints = rtg_skl({"keypoints3d": joints, "meta_data": {"has_lhnd": True}})["keypoints3d"]
+    plot_3d_motion(
+        out_path=result_path,
+        joints=joints,
+        kinematic_chain=kinematic_chain,
+        title=None,
+        fps=fps)
+    if npy_path is not None:
+        np.save(npy_path, joints)
+def create_lmm():
+    config_path = "configs/lmm/lmm_small_demo.py"
+    ckpt_path = "pretrained/lmm_small_demo.pth"
+    cfg = mmcv.Config.fromfile(config_path)
+    model = build_architecture(cfg.model)
+    load_checkpoint(model, ckpt_path, map_location='cpu')
+    if device == 'cpu':
+        model = model.cpu()
+    else:
+        model = MMDataParallel(model, device_ids=[0])
+    model.eval()
+    return model
+# device = 'cpu'
+device = 'cuda'
+# os.environ["NO_PROXY"]    = os.environ["no_proxy"]    = "localhost, 127.0.0.1:7860"
+model_lmm = create_lmm()
+model_imagebind = imagebind_huge(pretrained=True)
+model_imagebind.eval()
+model_imagebind.to(device)
+rtg_skl = RetargetSkeleton(tgt_skel_file='data/motionverse/statistics/skeleton.npy')
+mean_path = "data/mean.npy"
+std_path = "data/std.npy"
+mean = np.load(mean_path)
+std = np.load(std_path)
+def show_generation_result(model, text, audio_path, motion_length, result_path):
+    fps = 20
+    if audio_path is not None:
+        motion_length = min(200, int(get_audio_length(audio_path) * fps) + 1)
+    motion = torch.zeros(1, motion_length, 669).to(device)
+    motion_mask = torch.ones(1, motion_length).to(device)
+    motion_mask[0, :motion_length] = 1
+    motion_mask = motion_mask.unsqueeze(-1).repeat(1, 1, 10)
+    motion_mask[:, :, 9] = 0
+    dataset_name = "humanml3d_t2m"
+    kinematic_chain = t2m_body_hand_kinematic_chain
+    rotation_type = "h3d_rot"
+    motion_metas = [{
+        'meta_data': dict(framerate=fps, dataset_name=dataset_name, rotation_type=rotation_type)
+    }]
+    motion_length = torch.Tensor([motion_length]).long().to(device)
+    if text is None and audio_path is not None:
+        text = "A person is standing and speaking."
+    model = model.to(device)
+    input = {
+        'motion': motion,
+        'motion_mask': motion_mask,
+        'motion_length': motion_length,
+        'motion_metas': motion_metas,
+        'num_intervals': 1
+    }
+    if text is not None:
+        text_word_feat, text_seq_feat = \
+            extract_text_feature([text], model_imagebind, device)
+        assert text_word_feat.shape[0] == 1
+        assert text_word_feat.shape[1] == 77
+        assert text_word_feat.shape[2] == 1024
+        assert text_seq_feat.shape[0] == 1
+        assert text_seq_feat.shape[1] == 1024
+        input['text_word_feat'] = text_word_feat
+        input['text_seq_feat'] = text_seq_feat
+        input['text_cond'] = torch.Tensor([1.0] * 1).to(device)
+    else:
+        input['text_word_feat'] = torch.zeros(1, 77, 1024).to(device)
+        input['text_seq_feat'] = torch.zeros(1, 1024)
+        input['text_cond'] = torch.Tensor([0] * 1).to(device)
+    if audio_path is not None:
+        speech_word_feat, speech_seq_feat = \
+            extract_audio_feature([audio_path], model_imagebind, device)
+        assert speech_word_feat.shape[0] == 1
+        assert speech_word_feat.shape[1] == 229
+        assert speech_word_feat.shape[2] == 768
+        assert speech_seq_feat.shape[0] == 1
+        assert speech_seq_feat.shape[1] == 1024
+        input['speech_word_feat'] = speech_word_feat
+        input['speech_seq_feat'] = speech_seq_feat
+        input['speech_cond'] = torch.Tensor([1.0] * 1).to(device)
+    else:
+        input['speech_word_feat'] = torch.zeros(1, 229, 768).to(device)
+        input['speech_seq_feat'] = torch.zeros(1, 1024)
+        input['speech_cond'] = torch.Tensor([0] * 1).to(device)
+    all_pred_motion = []
+    with torch.no_grad():
+        input['inference_kwargs'] = {}
+        output = model(**input)[0]['pred_motion'][:motion_length]
+        pred_motion = output.cpu().detach().numpy()
+        pred_motion = pred_motion * std + mean
+    plot_tomato(pred_motion, kinematic_chain, result_path, None, fps, 2)
+    if audio_path is not None:
+        add_audio(result_path, [audio_path])
+def generate(prompt, audio_path, length):
+    if not os.path.exists("outputs"):
+        os.mkdir("outputs")
+    result_path = "outputs/" + str(int(time.time())) + ".mp4"
+    print(audio_path)
+    if audio_path.endswith("placeholder.wav"):
+        audio_path = None
+    if len(prompt) == 0:
+        prompt = None
+    show_generation_result(model_lmm, prompt, audio_path, length, result_path)
+    return result_path
+input_audio = gr.Audio(
+    type='filepath',
+    format='wav',
+    label="Audio (1-10s, overwrite motion length):",
+    show_label=True,
+    sources=["upload", "microphone"],
+    min_length=1,
+    max_length=10,
+    waveform_options=gr.WaveformOptions(
+        waveform_color="#01C6FF",
+        waveform_progress_color="#0066B4",
+        skip_length=2,
+        show_controls=False,
+    ),
+)
+input_text = gr.Textbox(
+    label="Text prompt:"
+)
+demo = gr.Interface(
+    fn=generate,
+    inputs=[input_text, input_audio, gr.Slider(20, 200, value=60, label="Motion length (fps 20):")],
+    outputs=gr.Video(label="Video:"),
+    examples=[
+        ["A person walks in a circle.", "examples/placeholder.m4a", 120],
+        ["A person jumps forward.", "examples/placeholder.m4a", 100],
+        ["A person is stretching arms.", "examples/placeholder.m4a", 80],
+        ["", "examples/surprise.m4a", 200],
+        ["", "examples/angry.m4a", 200],
+    ],
+    title="LMM: Large Motion Model for Unified Multi-Modal Motion Generation",
+    description="\nThis is an interactive demo for LMM. For more information, feel free to visit our project page(https://github.com/mingyuan-zhang/LMM).")
+demo.queue()
+demo.launch()

configs/lmm/lmm.py ADDED Viewed

	@@ -0,0 +1,75 @@

+dataset_names = [
+    'all',
+    'amass_mocap', 'motionx_mocap', 'humanact12_mocap', 'uestc_mocap', 'ntu_mocap', 'aist_mocap',
+    'beat_mocap', 'tedg_mocap', 'tedex_mocap', 's2g3d_mocap', 'h36m_mocap', 'mpi_mocap',
+    'humanml3d_t2m', 'kitml_t2m', 'babel_t2m', 'motionx_t2m',
+    'humanact12_t2m', 'uestc_t2m', 'ntu_t2m',
+    'aist_m2d',
+    'beat_s2g', 'tedg_s2g', 'tedex_s2g', 's2g3d_s2g',
+    'h36m_v2m', 'mpi_v2m'
+]
+num_datasets = len(dataset_names)
+# model settings
+model = dict(
+    type='UnifiedMotionDiffusion',
+    model=dict(
+        type='LargeMotionModel',
+        input_feats=669,
+        max_seq_len=200,
+        num_parts=10,
+        latent_part_dim=64,
+        time_embed_dim=2048,
+        dataset_names=dataset_names,
+        num_layers=4,
+        num_cond_layers=2,
+        num_datasets=num_datasets,
+        dropout=0,
+        ca_block_cfg=dict(
+            type='ArtAttention',
+            num_experts=16,
+            topk=4,
+            gate_type='cosine_top',
+            gate_noise=1.0,
+            num_datasets=num_datasets,
+            has_text=True,
+            has_music=True,
+            has_speech=True,
+            has_video=True
+        ),
+        text_input_dim=1024,
+        music_input_dim=768,
+        speech_input_dim=768,
+        video_input_dim=1024,
+        guidance_cfg=dict(
+            all=dict(type='linear', scale=5.5),
+        ),
+        moe_route_loss_weight=10.0,
+        template_kl_loss_weight=0.0001,
+        use_pos_embedding=False,
+        cond_drop_rate=0.1
+    ),
+    loss_recon=dict(
+        type='KinematicLoss', loss_type='mse', loss_weight=[20], reduction='none'),
+    train_repeat=1,
+    diffusion_train=dict(
+        beta_scheduler='linear',
+        diffusion_steps=1000,
+        model_mean_type='start_x',
+        model_var_type='fixed_large',
+    ),
+    diffusion_test_dict=dict(
+        base=dict(
+            beta_scheduler='linear',
+            diffusion_steps=1000,
+            model_mean_type='start_x',
+            model_var_type='fixed_large',
+        ),
+        all='15,15,8,6,6'
+    ),
+    inference_type='ddim',
+    loss_reduction='batch',
+    loss_weight='data/motionverse/statistics/loss_weight.npy'
+)

configs/lmm/lmm_small_demo.py ADDED Viewed

	@@ -0,0 +1,22 @@

+_base_ = ['lmm.py']
+model = dict(
+    model=dict(
+        latent_part_dim=64,
+        num_layers=8,
+        num_cond_layers=2,
+        dropout=0.1,
+        ca_block_cfg=dict(
+            num_experts=16,
+            topk=4
+        ),
+        guidance_cfg=dict(
+            humanml3d_t2m=dict(type='linear', scale=10.5),
+        ),
+    ),
+    diffusion_test_dict=dict(
+        humanml3d_t2m='15,15,8,6,6',
+    ),
+)
+data = dict(samples_per_gpu=32)

examples/angry.m4a ADDED Viewed

Binary file (108 kB). View file

examples/placeholder.m4a ADDED Viewed

Binary file (30 kB). View file

examples/surprise.m4a ADDED Viewed

Binary file (89.4 kB). View file

mogen/__init__.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import warnings
+import mmcv
+from packaging.version import parse
+from .version import __version__
+def digit_version(version_str: str, length: int = 4):
+    """Convert a version string into a tuple of integers.
+    This method is usually used for comparing two versions. For pre-release
+    versions: alpha < beta < rc.
+    Args:
+        version_str (str): The version string.
+        length (int): The maximum number of version levels. Default: 4.
+    Returns:
+        tuple[int]: The version info in digits (integers).
+    """
+    version = parse(version_str)
+    assert version.release, f'failed to parse version {version_str}'
+    release = list(version.release)
+    release = release[:length]
+    if len(release) < length:
+        release = release + [0] * (length - len(release))
+    if version.is_prerelease:
+        mapping = {'a': -3, 'b': -2, 'rc': -1}
+        val = -4
+        # version.pre can be None
+        if version.pre:
+            if version.pre[0] not in mapping:
+                warnings.warn(f'unknown prerelease version {version.pre[0]}, '
+                              'version checking may go wrong')
+            else:
+                val = mapping[version.pre[0]]
+            release.extend([val, version.pre[-1]])
+        else:
+            release.extend([val, 0])
+    elif version.is_postrelease:
+        release.extend([1, version.post])
+    else:
+        release.extend([0, 0])
+    return tuple(release)
+mmcv_minimum_version = '1.4.2'
+mmcv_maximum_version = '1.9.0'
+mmcv_version = digit_version(mmcv.__version__)
+assert (mmcv_version >= digit_version(mmcv_minimum_version)
+        and mmcv_version <= digit_version(mmcv_maximum_version)), \
+    f'MMCV=={mmcv.__version__} is used but incompatible. ' \
+    f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'
+__all__ = ['__version__', 'digit_version']

mogen/apis/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from mogen.apis.test import (collect_results_cpu, collect_results_gpu,
+                             multi_gpu_test, single_gpu_test)
+from mogen.apis.train import set_random_seed, train_model
+__all__ = [
+    'collect_results_cpu', 'collect_results_gpu', 'multi_gpu_test',
+    'single_gpu_test', 'set_random_seed', 'train_model'
+]

mogen/apis/test.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import os.path as osp
+import pickle
+import shutil
+import tempfile
+import time
+import mmcv
+import torch
+import torch.distributed as dist
+from mmcv.runner import get_dist_info
+def single_gpu_test(model, data_loader):
+    """Test with single gpu."""
+    model.eval()
+    results = []
+    dataset = data_loader.dataset
+    prog_bar = mmcv.ProgressBar(len(dataset))
+    for i, data in enumerate(data_loader):
+        with torch.no_grad():
+            result = model(return_loss=False, **data)
+        batch_size = len(result)
+        if isinstance(result, list):
+            results.extend(result)
+        else:
+            results.append(result)
+        batch_size = data['motion'].size(0)
+        for _ in range(batch_size):
+            prog_bar.update()
+    return results
+def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
+    """Test model with multiple gpus.
+    This method tests model with multiple gpus and collects the results
+    under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
+    it encodes results to gpu tensors and use gpu communication for results
+    collection. On cpu mode it saves the results on different gpus to 'tmpdir'
+    and collects them by the rank 0 worker.
+    Args:
+        model (nn.Module): Model to be tested.
+        data_loader (nn.Dataloader): Pytorch data loader.
+        tmpdir (str): Path of directory to save the temporary results from
+            different gpus under cpu mode.
+        gpu_collect (bool): Option to use either gpu or cpu to collect results.
+    Returns:
+        list: The prediction results.
+    """
+    model.eval()
+    results = []
+    dataset = data_loader.dataset
+    rank, world_size = get_dist_info()
+    if rank == 0:
+        # Check if tmpdir is valid for cpu_collect
+        if (not gpu_collect) and (tmpdir is not None and osp.exists(tmpdir)):
+            raise OSError((f'The tmpdir {tmpdir} already exists.',
+                           ' Since tmpdir will be deleted after testing,',
+                           ' please make sure you specify an empty one.'))
+        prog_bar = mmcv.ProgressBar(len(dataset))
+    time.sleep(2)  # This line can prevent deadlock problem in some cases.
+    for i, data in enumerate(data_loader):
+        with torch.no_grad():
+            result = model(return_loss=False, **data)
+        if isinstance(result, list):
+            results.extend(result)
+        else:
+            results.append(result)
+        if rank == 0:
+            batch_size = data['motion'].size(0)
+            for _ in range(batch_size * world_size):
+                prog_bar.update()
+    # collect results from all ranks
+    if gpu_collect:
+        results = collect_results_gpu(results, len(dataset))
+    else:
+        results = collect_results_cpu(results, len(dataset), tmpdir)
+    return results
+def collect_results_cpu(result_part, size, tmpdir=None):
+    """Collect results in cpu."""
+    rank, world_size = get_dist_info()
+    # create a tmp dir if it is not specified
+    if tmpdir is None:
+        MAX_LEN = 512
+        # 32 is whitespace
+        dir_tensor = torch.full((MAX_LEN, ),
+                                32,
+                                dtype=torch.uint8,
+                                device='cuda')
+        if rank == 0:
+            mmcv.mkdir_or_exist('.dist_test')
+            tmpdir = tempfile.mkdtemp(dir='.dist_test')
+            tmpdir = torch.tensor(bytearray(tmpdir.encode()),
+                                  dtype=torch.uint8,
+                                  device='cuda')
+            dir_tensor[:len(tmpdir)] = tmpdir
+        dist.broadcast(dir_tensor, 0)
+        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
+    else:
+        mmcv.mkdir_or_exist(tmpdir)
+    # dump the part result to the dir
+    mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
+    dist.barrier()
+    # collect all parts
+    if rank != 0:
+        return None
+    else:
+        # load results of all parts from tmp dir
+        part_list = []
+        for i in range(world_size):
+            part_file = osp.join(tmpdir, f'part_{i}.pkl')
+            part_result = mmcv.load(part_file)
+            part_list.append(part_result)
+        # sort the results
+        ordered_results = []
+        for res in zip(*part_list):
+            ordered_results.extend(list(res))
+        # the dataloader may pad some samples
+        ordered_results = ordered_results[:size]
+        # remove tmp dir
+        shutil.rmtree(tmpdir)
+        return ordered_results
+def collect_results_gpu(result_part, size):
+    """Collect results in gpu."""
+    rank, world_size = get_dist_info()
+    # dump result part to tensor with pickle
+    part_tensor = torch.tensor(bytearray(pickle.dumps(result_part)),
+                               dtype=torch.uint8,
+                               device='cuda')
+    # gather all result part tensor shape
+    shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
+    shape_list = [shape_tensor.clone() for _ in range(world_size)]
+    dist.all_gather(shape_list, shape_tensor)
+    # padding result part tensor to max length
+    shape_max = torch.tensor(shape_list).max()
+    part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
+    part_send[:shape_tensor[0]] = part_tensor
+    part_recv_list = [
+        part_tensor.new_zeros(shape_max) for _ in range(world_size)
+    ]
+    # gather all result part
+    dist.all_gather(part_recv_list, part_send)
+    if rank == 0:
+        ordered_results = []
+        for recv, shape in zip(part_recv_list, shape_list):
+            part_result = pickle.loads(recv[:shape[0]].cpu().numpy().tobytes())
+            ordered_results.extend(part_result)
+        # the dataloader may pad some samples
+        ordered_results = ordered_results[:size]
+        return ordered_results

mogen/apis/train.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import random
+import warnings
+import numpy as np
+import torch
+from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
+from mmcv.runner import (
+    DistSamplerSeedHook,
+    Fp16OptimizerHook,
+    OptimizerHook,
+    GradientCumulativeFp16OptimizerHook,
+    GradientCumulativeOptimizerHook,
+    build_runner)
+from mogen.core.distributed_wrapper import DistributedDataParallelWrapper
+from mogen.core.evaluation import DistEvalHook, EvalHook
+from mogen.core.optimizer import build_optimizers
+from mogen.datasets import build_dataloader, build_dataset
+from mogen.utils import get_root_logger
+def set_random_seed(seed, deterministic=False):
+    """Set random seed.
+    Args:
+        seed (int): Seed to be used.
+        deterministic (bool): Whether to set the deterministic option for
+            CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
+            to True and `torch.backends.cudnn.benchmark` to False.
+            Default: False.
+    """
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    if deterministic:
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+def train_model(model,
+                dataset,
+                cfg,
+                distributed=False,
+                validate=False,
+                timestamp=None,
+                device='cuda',
+                meta=None):
+    """Main api for training model."""
+    logger = get_root_logger(cfg.log_level)
+    # prepare data loaders
+    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
+    data_loaders = [
+        build_dataloader(
+            ds,
+            cfg.data.samples_per_gpu,
+            cfg.data.workers_per_gpu,
+            # cfg.gpus will be ignored if distributed
+            num_gpus=len(cfg.gpu_ids),
+            dist=distributed,
+            round_up=True,
+            sampler_cfg=cfg.data.sampler_cfg,
+            batch_sampler_cfg=cfg.data.batch_sampler_cfg,
+            seed=cfg.seed) for ds in dataset
+    ]
+    # determine whether use adversarial training precess or not
+    use_adversarial_train = cfg.get('use_adversarial_train', False)
+    # put model on gpus
+    if distributed:
+        find_unused_parameters = cfg.get('find_unused_parameters', True)
+        # Sets the `find_unused_parameters` parameter in
+        # torch.nn.parallel.DistributedDataParallel
+        if use_adversarial_train:
+            # Use DistributedDataParallelWrapper for adversarial training
+            model = DistributedDataParallelWrapper(
+                model,
+                device_ids=[torch.cuda.current_device()],
+                broadcast_buffers=False,
+                find_unused_parameters=find_unused_parameters)
+        else:
+            model = MMDistributedDataParallel(
+                model.cuda(),
+                device_ids=[torch.cuda.current_device()],
+                broadcast_buffers=False,
+                find_unused_parameters=find_unused_parameters)
+    else:
+        if device == 'cuda':
+            model = MMDataParallel(model.cuda(cfg.gpu_ids[0]),
+                                   device_ids=cfg.gpu_ids)
+        elif device == 'cpu':
+            model = model.cpu()
+        else:
+            raise ValueError(F'unsupported device name {device}.')
+    # build runner
+    optimizer = build_optimizers(model, cfg.optimizer)
+    if cfg.get('runner') is None:
+        cfg.runner = {
+            'type': 'EpochBasedRunner',
+            'max_epochs': cfg.total_epochs
+        }
+        warnings.warn(
+            'config is now expected to have a `runner` section, '
+            'please set `runner` in your config.', UserWarning)
+    runner = build_runner(cfg.runner,
+                          default_args=dict(model=model,
+                                            batch_processor=None,
+                                            optimizer=optimizer,
+                                            work_dir=cfg.work_dir,
+                                            logger=logger,
+                                            meta=meta))
+    # an ugly walkaround to make the .log and .log.json filenames the same
+    runner.timestamp = timestamp
+    if use_adversarial_train:
+        # The optimizer step process is included in the train_step function
+        # of the model, so the runner should NOT include optimizer hook.
+        optimizer_config = None
+    else:
+        if distributed and 'type' not in cfg.optimizer_config:
+            optimizer_config = OptimizerHook(**cfg.optimizer_config)
+        else:
+            optimizer_config = cfg.optimizer_config
+    # register hooks
+    runner.register_training_hooks(cfg.lr_config,
+                                   optimizer_config,
+                                   cfg.checkpoint_config,
+                                   cfg.log_config,
+                                   cfg.get('momentum_config', None),
+                                   custom_hooks_config=cfg.get(
+                                       'custom_hooks', None))
+    if distributed:
+        runner.register_hook(DistSamplerSeedHook())
+    # register eval hooks
+    if validate:
+        val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
+        val_dataloader = build_dataloader(
+            val_dataset,
+            samples_per_gpu=cfg.data.samples_per_gpu,
+            workers_per_gpu=cfg.data.workers_per_gpu,
+            dist=distributed,
+            shuffle=False,
+            round_up=True)
+        eval_cfg = cfg.get('evaluation', {})
+        eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'
+        eval_hook = DistEvalHook if distributed else EvalHook
+        runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
+    if cfg.resume_from:
+        runner.resume(cfg.resume_from)
+    elif cfg.load_from:
+        runner.load_checkpoint(cfg.load_from)
+    runner.run(data_loaders, cfg.workflow)

mogen/core/__init__.py ADDED Viewed

File without changes

mogen/core/distributed_wrapper.py ADDED Viewed

	@@ -0,0 +1,135 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+from mmcv.parallel import MODULE_WRAPPERS, MMDistributedDataParallel
+from mmcv.parallel.scatter_gather import scatter_kwargs
+from torch.cuda._utils import _get_device_index
+@MODULE_WRAPPERS.register_module()
+class DistributedDataParallelWrapper(nn.Module):
+    """A DistributedDataParallel wrapper for models in 3D mesh estimation task.
+    In some pieplines, there is a need to wrap different modules in
+    the models with separate DistributedDataParallel. Otherwise, it will cause
+    errors for GAN training.
+    More specific, the GAN model, usually has two sub-modules:
+    generator and discriminator. If we wrap both of them in one
+    standard DistributedDataParallel, it will cause errors during training,
+    because when we update the parameters of the generator (or discriminator),
+    the parameters of the discriminator (or generator) is not updated, which is
+    not allowed for DistributedDataParallel.
+    So we design this wrapper to separately wrap DistributedDataParallel
+    for generator and discriminator.
+    In this wrapper, we perform two operations:
+    1. Wrap the modules in the models with separate MMDistributedDataParallel.
+        Note that only modules with parameters will be wrapped.
+    2. Do scatter operation for 'forward', 'train_step' and 'val_step'.
+    Note that the arguments of this wrapper is the same as those in
+    `torch.nn.parallel.distributed.DistributedDataParallel`.
+    Args:
+        module (nn.Module): Module that needs to be wrapped.
+        device_ids (list[int | `torch.device`]): Same as that in
+            `torch.nn.parallel.distributed.DistributedDataParallel`.
+        dim (int, optional): Same as that in the official scatter function in
+            pytorch. Defaults to 0.
+        broadcast_buffers (bool): Same as that in
+            `torch.nn.parallel.distributed.DistributedDataParallel`.
+            Defaults to False.
+        find_unused_parameters (bool, optional): Same as that in
+            `torch.nn.parallel.distributed.DistributedDataParallel`.
+            Traverse the autograd graph of all tensors contained in returned
+            value of the wrapped module’s forward function. Defaults to False.
+        kwargs (dict): Other arguments used in
+            `torch.nn.parallel.distributed.DistributedDataParallel`.
+    """
+    def __init__(self,
+                 module,
+                 device_ids,
+                 dim=0,
+                 broadcast_buffers=False,
+                 find_unused_parameters=False,
+                 **kwargs):
+        super().__init__()
+        assert len(device_ids) == 1, (
+            'Currently, DistributedDataParallelWrapper only supports one'
+            'single CUDA device for each process.'
+            f'The length of device_ids must be 1, but got {len(device_ids)}.')
+        self.module = module
+        self.dim = dim
+        self.to_ddp(device_ids=device_ids,
+                    dim=dim,
+                    broadcast_buffers=broadcast_buffers,
+                    find_unused_parameters=find_unused_parameters,
+                    **kwargs)
+        self.output_device = _get_device_index(device_ids[0], True)
+    def to_ddp(self, device_ids, dim, broadcast_buffers,
+               find_unused_parameters, **kwargs):
+        """Wrap models with separate MMDistributedDataParallel.
+        It only wraps the modules with parameters.
+        """
+        for name, module in self.module._modules.items():
+            if next(module.parameters(), None) is None:
+                module = module.cuda()
+            elif all(not p.requires_grad for p in module.parameters()):
+                module = module.cuda()
+            else:
+                module = MMDistributedDataParallel(
+                    module.cuda(),
+                    device_ids=device_ids,
+                    dim=dim,
+                    broadcast_buffers=broadcast_buffers,
+                    find_unused_parameters=find_unused_parameters,
+                    **kwargs)
+            self.module._modules[name] = module
+    def scatter(self, inputs, kwargs, device_ids):
+        """Scatter function.
+        Args:
+            inputs (Tensor): Input Tensor.
+            kwargs (dict): Args for
+                ``mmcv.parallel.scatter_gather.scatter_kwargs``.
+            device_ids (int): Device id.
+        """
+        return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
+    def forward(self, *inputs, **kwargs):
+        """Forward function.
+        Args:
+            inputs (tuple): Input data.
+            kwargs (dict): Args for
+                ``mmcv.parallel.scatter_gather.scatter_kwargs``.
+        """
+        inputs, kwargs = self.scatter(inputs, kwargs,
+                                      [torch.cuda.current_device()])
+        return self.module(*inputs[0], **kwargs[0])
+    def train_step(self, *inputs, **kwargs):
+        """Train step function.
+        Args:
+            inputs (Tensor): Input Tensor.
+            kwargs (dict): Args for
+                ``mmcv.parallel.scatter_gather.scatter_kwargs``.
+        """
+        inputs, kwargs = self.scatter(inputs, kwargs,
+                                      [torch.cuda.current_device()])
+        output = self.module.train_step(*inputs[0], **kwargs[0])
+        return output
+    def val_step(self, *inputs, **kwargs):
+        """Validation step function.
+        Args:
+            inputs (tuple): Input data.
+            kwargs (dict): Args for ``scatter_kwargs``.
+        """
+        inputs, kwargs = self.scatter(inputs, kwargs,
+                                      [torch.cuda.current_device()])
+        output = self.module.val_step(*inputs[0], **kwargs[0])
+        return output

mogen/core/optimizer/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .builder import OPTIMIZERS, build_optimizers
2	+
3	+ __all__ = ['build_optimizers', 'OPTIMIZERS']

mogen/core/optimizer/builder.py ADDED Viewed

	@@ -0,0 +1,52 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.runner import build_optimizer
+from mmcv.utils import Registry
+OPTIMIZERS = Registry('optimizers')
+def build_optimizers(model, cfgs):
+    """Build multiple optimizers from configs. If `cfgs` contains several dicts
+    for optimizers, then a dict for each constructed optimizers will be
+    returned. If `cfgs` only contains one optimizer config, the constructed
+    optimizer itself will be returned. For example,
+    1) Multiple optimizer configs:
+    .. code-block:: python
+        optimizer_cfg = dict(
+            model1=dict(type='SGD', lr=lr),
+            model2=dict(type='SGD', lr=lr))
+    The return dict is
+    ``dict('model1': torch.optim.Optimizer, 'model2': torch.optim.Optimizer)``
+    2) Single optimizer config:
+    .. code-block:: python
+        optimizer_cfg = dict(type='SGD', lr=lr)
+    The return is ``torch.optim.Optimizer``.
+    Args:
+        model (:obj:`nn.Module`): The model with parameters to be optimized.
+        cfgs (dict): The config dict of the optimizer.
+    Returns:
+        dict[:obj:`torch.optim.Optimizer`] | :obj:`torch.optim.Optimizer`:
+            The initialized optimizers.
+    """
+    optimizers = {}
+    if hasattr(model, 'module'):
+        model = model.module
+    # determine whether 'cfgs' has several dicts for optimizers
+    if all(isinstance(v, dict) for v in cfgs.values()):
+        for key, cfg in cfgs.items():
+            cfg_ = cfg.copy()
+            module = getattr(model, key)
+            optimizers[key] = build_optimizer(module, cfg_)
+        return optimizers
+    return build_optimizer(model, cfgs)

mogen/datasets/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from .base_dataset import BaseMotionDataset
+from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset
+from .pipelines import Compose
+from .samplers import DistributedSampler
+from .text_motion_dataset import TextMotionDataset
+from .motionverse_dataset import MotionVerse
+__all__ = [
+    'BaseMotionDataset', 'TextMotionDataset', 'DATASETS', 'PIPELINES',
+    'build_dataloader', 'build_dataset', 'Compose', 'DistributedSampler',
+    'MotionVerse'
+]

mogen/datasets/base_dataset.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import copy
+import os
+import json
+from abc import abstractmethod
+from typing import Optional, Union, List, Dict
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+# from mogen.core.evaluation import build_evaluator
+from mogen.models.builder import build_submodule
+from .builder import DATASETS
+from .pipelines import Compose
+@DATASETS.register_module()
+class BaseMotionDataset(Dataset):
+    """
+    Base class for motion datasets.
+    Args:
+        data_prefix (str): The prefix of the data path.
+        pipeline (list): A list of dicts, where each element represents an operation
+                         defined in `mogen.datasets.pipelines`.
+        dataset_name (Optional[Union[str, None]]): The name of the dataset. Used to
+                         identify the type of evaluation metric.
+        fixed_length (Optional[Union[int, None]]): The fixed length of the dataset for
+                         iteration. If None, the dataset length is based on the number
+                         of annotations.
+        ann_file (Optional[Union[str, None]]): The annotation file. If it is a string,
+                         it is expected to be read from the file. If None, it will be
+                         read from `data_prefix`.
+        motion_dir (Optional[Union[str, None]]): The directory containing motion data.
+        eval_cfg (Optional[Union[dict, None]]): Configuration for evaluation metrics.
+        test_mode (Optional[bool]): Whether the dataset is in test mode. Default is False.
+    Attributes:
+        data_infos (list): Loaded dataset annotations.
+        evaluators (list): List of evaluation objects.
+        eval_indexes (np.ndarray): Array of indices used for evaluation.
+        evaluator_model (torch.nn.Module): Model used for evaluation.
+        pipeline (Compose): Data processing pipeline.
+    """
+    def __init__(self,
+                 data_prefix: str,
+                 pipeline: List[Dict],
+                 dataset_name: Optional[Union[str, None]] = None,
+                 fixed_length: Optional[Union[int, None]] = None,
+                 ann_file: Optional[Union[str, None]] = None,
+                 motion_dir: Optional[Union[str, None]] = None,
+                 eval_cfg: Optional[Union[dict, None]] = None,
+                 test_mode: Optional[bool] = False):
+        super(BaseMotionDataset, self).__init__()
+        self.data_prefix = data_prefix
+        self.pipeline = Compose(pipeline)
+        self.dataset_name = dataset_name
+        self.fixed_length = fixed_length
+        self.ann_file = os.path.join(data_prefix, 'datasets', dataset_name, ann_file)
+        self.motion_dir = os.path.join(data_prefix, 'datasets', dataset_name, motion_dir)
+        self.eval_cfg = copy.deepcopy(eval_cfg)
+        self.test_mode = test_mode
+        self.load_annotations()
+        if self.test_mode:
+            self.prepare_evaluation()
+    @abstractmethod
+    def load_anno(self, name: str) -> dict:
+        """
+        Abstract method to load a single annotation.
+        Args:
+            name (str): Name or identifier of the annotation to load.
+        Returns:
+            dict: Loaded annotation as a dictionary.
+        """
+        pass
+    def load_annotations(self):
+        """Load annotations from `ann_file` to `data_infos`."""
+        self.data_infos = []
+        idx = 0
+        for line in open(self.ann_file, 'r').readlines():
+            line = line.strip()
+            self.data_infos.append(self.load_anno(idx, line))
+            idx += 1
+    def prepare_data(self, idx: int) -> dict:
+        """
+        Prepare raw data for the given index.
+        Args:
+            idx (int): Index of the data to prepare.
+        Returns:
+            dict: Processed data for the given index.
+        """
+        results = copy.deepcopy(self.data_infos[idx])
+        results['dataset_name'] = self.dataset_name
+        results['sample_idx'] = idx
+        return self.pipeline(results)
+    def __len__(self) -> int:
+        """Return the length of the current dataset.
+        Returns:
+            int: Length of the dataset.
+        """
+        if self.test_mode:
+            return len(self.eval_indexes)
+        elif self.fixed_length is not None:
+            return self.fixed_length
+        return len(self.data_infos)
+    def __getitem__(self, idx: int) -> dict:
+        """
+        Prepare data for the given index.
+        Args:
+            idx (int): Index of the data.
+        Returns:
+            dict: Data for the specified index.
+        """
+        if self.test_mode:
+            idx = self.eval_indexes[idx]
+        elif self.fixed_length is not None:
+            idx = idx % len(self.data_infos)
+        elif self.balanced_sampling:
+            cid = np.random.randint(0, len(self.category_list))
+            idx = np.random.randint(0, len(self.category_list[cid]))
+            idx = self.category_list[cid][idx]
+        return self.prepare_data(idx)
+    def prepare_evaluation(self):
+        """Prepare evaluation settings, including evaluators and evaluation indices."""
+        self.evaluators = []
+        self.eval_indexes = []
+        self.evaluator_model = build_submodule(self.eval_cfg.get('evaluator_model', None))
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.evaluator_model = self.evaluator_model.to(device)
+        self.evaluator_model.eval()
+        self.eval_cfg['evaluator_model'] = self.evaluator_model
+        for _ in range(self.eval_cfg['replication_times']):
+            eval_indexes = np.arange(len(self.data_infos))
+            if self.eval_cfg.get('shuffle_indexes', False):
+                np.random.shuffle(eval_indexes)
+            self.eval_indexes.append(eval_indexes)
+        for metric in self.eval_cfg['metrics']:
+            evaluator, self.eval_indexes = build_evaluator(
+                metric, self.eval_cfg, len(self.data_infos), self.eval_indexes)
+            self.evaluators.append(evaluator)
+        self.eval_indexes = np.concatenate(self.eval_indexes)
+    def evaluate(self, results: List[dict], work_dir: str, logger=None) -> dict:
+        """
+        Evaluate the model performance based on the results.
+        Args:
+            results (list): A list of result dictionaries.
+            work_dir (str): Directory where evaluation logs will be stored.
+            logger: Logger object to record evaluation results (optional).
+        Returns:
+            dict: Dictionary containing evaluation metrics.
+        """
+        metrics = {}
+        for evaluator in self.evaluators:
+            metrics.update(evaluator.evaluate(results))
+        if logger is not None:
+            logger.info(metrics)
+        eval_output = os.path.join(work_dir, 'eval_results.log')
+        with open(eval_output, 'w') as f:
+            for k, v in metrics.items():
+                f.write(k + ': ' + str(v) + '\n')
+        return metrics

mogen/datasets/builder.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import platform
+import random
+from functools import partial
+from typing import Optional, Union
+import numpy as np
+from mmcv.parallel import collate
+from mmcv.runner import get_dist_info
+from mmcv.utils import Registry, build_from_cfg
+from torch.utils.data import DataLoader
+from torch.utils.data.dataset import Dataset
+from .samplers import (
+    DistributedSampler,
+    DistributedWeightedRandomSampler,
+    MonoTaskBatchSampler
+)
+if platform.system() != 'Windows':
+    # https://github.com/pytorch/pytorch/issues/973
+    import resource
+    rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
+    base_soft_limit = rlimit[0]
+    hard_limit = rlimit[1]
+    soft_limit = min(max(4096, base_soft_limit), hard_limit)
+    resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
+DATASETS = Registry('dataset')
+PIPELINES = Registry('pipeline')
+def build_dataset(cfg: Union[dict, list, tuple],
+                  default_args: Optional[Union[dict, None]] = None):
+    """"Build dataset by the given config."""
+    from .dataset_wrappers import ConcatDataset, RepeatDataset
+    if isinstance(cfg, (list, tuple)):
+        dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
+    elif cfg['type'] == 'RepeatDataset':
+        dataset = RepeatDataset(build_dataset(cfg['dataset'], default_args),
+                                cfg['times'])
+    else:
+        dataset = build_from_cfg(cfg, DATASETS, default_args)
+    return dataset
+def build_dataloader(dataset: Dataset,
+                     samples_per_gpu: int,
+                     workers_per_gpu: int,
+                     num_gpus: Optional[int] = 1,
+                     dist: Optional[bool] = True,
+                     shuffle: Optional[bool] = True,
+                     round_up: Optional[bool] = True,
+                     seed: Optional[Union[int, None]] = None,
+                     sampler_cfg: Optional[dict] = None,
+                     batch_sampler_cfg: Optional[dict] = None,
+                     persistent_workers: Optional[bool] = True,
+                     **kwargs):
+    """Build PyTorch DataLoader.
+    In distributed training, each GPU/process has a dataloader.
+    In non-distributed training, there is only one dataloader for all GPUs.
+    Args:
+        dataset (:obj:`Dataset`): A PyTorch dataset.
+        samples_per_gpu (int): Number of training samples on each GPU, i.e.,
+            batch size of each GPU.
+        workers_per_gpu (int): How many subprocesses to use for data loading
+            for each GPU.
+        num_gpus (int, optional): Number of GPUs. Only used in non-distributed
+            training.
+        dist (bool, optional): Distributed training/test or not. Default: True.
+        shuffle (bool, optional): Whether to shuffle the data at every epoch.
+            Default: True.
+        round_up (bool, optional): Whether to round up the length of dataset by
+            adding extra samples to make it evenly divisible. Default: True.
+        kwargs: any keyword argument to be used to initialize DataLoader
+    Returns:
+        DataLoader: A PyTorch dataloader.
+    """
+    rank, world_size = get_dist_info()
+    if dist:
+        weighted_sample = False
+        if sampler_cfg is not None:
+            weighted_sample = sampler_cfg.get('weighted_sample', False)
+        if weighted_sample:
+            sampler_cls = DistributedWeightedRandomSampler
+        else:
+            sampler_cls = DistributedSampler
+        sampler = sampler_cls(
+            dataset,
+            world_size,
+            rank,
+            shuffle=shuffle,
+            round_up=round_up
+        )
+        shuffle = False
+        batch_size = samples_per_gpu
+        num_workers = workers_per_gpu
+    else:
+        sampler = None
+        batch_size = num_gpus * samples_per_gpu
+        num_workers = num_gpus * workers_per_gpu
+    init_fn = partial(
+        worker_init_fn, num_workers=num_workers, rank=rank,
+        seed=seed) if seed is not None else None
+    if batch_sampler_cfg is not None:
+        type_name = batch_sampler_cfg['type']
+        assert type_name == 'MonoTaskBatchSampler'
+        batch_sampler = MonoTaskBatchSampler(
+            sampler=sampler,
+            batch_size=batch_size,
+            num_tasks = batch_sampler_cfg['num_tasks']
+        )
+        data_loader = DataLoader(
+            dataset,
+            batch_sampler=batch_sampler,
+            num_workers=num_workers,
+            collate_fn=partial(
+                collate, samples_per_gpu=samples_per_gpu),
+            pin_memory=False,
+            shuffle=shuffle,
+            worker_init_fn=init_fn,
+            persistent_workers=persistent_workers,
+            **kwargs)
+    else:
+        data_loader = DataLoader(
+            dataset,
+            batch_size=batch_size,
+            sampler=sampler,
+            num_workers=num_workers,
+            collate_fn=partial(
+                collate, samples_per_gpu=samples_per_gpu),
+            pin_memory=False,
+            shuffle=shuffle,
+            worker_init_fn=init_fn,
+            persistent_workers=persistent_workers,
+            **kwargs)
+    return data_loader
+def worker_init_fn(worker_id: int, num_workers: int, rank: int, seed: int):
+    """Init random seed for each worker."""
+    # The seed of each worker equals to
+    # num_worker * rank + worker_id + user_seed
+    worker_seed = num_workers * rank + worker_id + seed
+    np.random.seed(worker_seed)
+    random.seed(worker_seed)

mogen/datasets/dataset_wrappers.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
+from torch.utils.data.dataset import Dataset
+from .builder import DATASETS
+@DATASETS.register_module()
+class ConcatDataset(_ConcatDataset):
+    """A wrapper of concatenated dataset.
+    Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but
+    add `get_cat_ids` function.
+    Args:
+        datasets (list[:obj:`Dataset`]): A list of datasets.
+    """
+    def __init__(self, datasets: list):
+        super(ConcatDataset, self).__init__(datasets)
+@DATASETS.register_module()
+class RepeatDataset(object):
+    """A wrapper of repeated dataset.
+    The length of repeated dataset will be `times` larger than the original
+    dataset. This is useful when the data loading time is long but the dataset
+    is small. Using RepeatDataset can reduce the data loading time between
+    epochs.
+    Args:
+        dataset (:obj:`Dataset`): The dataset to be repeated.
+        times (int): Repeat times.
+    """
+    def __init__(self, dataset: Dataset, times: int):
+        self.dataset = dataset
+        self.times = times
+        self._ori_len = len(self.dataset)
+    def __getitem__(self, idx: int):
+        return self.dataset[idx % self._ori_len]
+    def __len__(self):
+        return self.times * self._ori_len

mogen/datasets/human_body_prior/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2018.01.02

mogen/datasets/human_body_prior/body_model/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2018.01.02

mogen/datasets/human_body_prior/body_model/body_model.py ADDED Viewed

	@@ -0,0 +1,281 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2018.12.13
+import numpy as np
+import torch
+import torch.nn as nn
+# from smplx.lbs import lbs
+from .lbs import lbs
+import sys
+class BodyModel(nn.Module):
+    def __init__(self,
+                 bm_fname,
+                 num_betas=10,
+                 num_dmpls=None, dmpl_fname=None,
+                 num_expressions=80,
+                 use_posedirs=True,
+                 dtype=torch.float32,
+                 persistant_buffer=False):
+        super(BodyModel, self).__init__()
+        '''
+        :param bm_fname: path to a SMPL model as pkl file
+        :param num_betas: number of shape parameters to include.
+        :param device: default on gpu
+        :param dtype: float precision of the computations
+        :return: verts, trans, pose, betas
+        '''
+        self.dtype = dtype
+        # -- Load SMPL params --
+        if '.npz' in bm_fname:
+            smpl_dict = np.load(bm_fname, encoding='latin1')
+        else:
+            raise ValueError('bm_fname should be either a .pkl nor .npz file')
+        # these are supposed for later convenient look up
+        self.num_betas = num_betas
+        self.num_dmpls = num_dmpls
+        self.num_expressions = num_expressions
+        njoints = smpl_dict['posedirs'].shape[2] // 3
+        self.model_type = {69: 'smpl', 153: 'smplh', 162: 'smplx', 45: 'mano', 105: 'animal_horse', 102: 'animal_dog', }[njoints]
+        assert self.model_type in ['smpl', 'smplh', 'smplx', 'mano', 'mano', 'animal_horse', 'animal_dog'], ValueError(
+            'model_type should be in smpl/smplh/smplx/mano.')
+        self.use_dmpl = False
+        if num_dmpls is not None:
+            if dmpl_fname is not None:
+                self.use_dmpl = True
+            else:
+                raise (ValueError('dmpl_fname should be provided when using dmpls!'))
+        if self.use_dmpl and self.model_type in ['smplx', 'mano', 'animal_horse', 'animal_dog']: raise (
+            NotImplementedError('DMPLs only work with SMPL/SMPLH models for now.'))
+        # Mean template vertices
+        self.comp_register('init_v_template', torch.tensor(smpl_dict['v_template'][None], dtype=dtype), persistent=persistant_buffer)
+        self.comp_register('f', torch.tensor(smpl_dict['f'].astype(np.int32), dtype=torch.int32), persistent=persistant_buffer)
+        num_total_betas = smpl_dict['shapedirs'].shape[-1]
+        if num_betas < 1:
+            num_betas = num_total_betas
+        shapedirs = smpl_dict['shapedirs'][:, :, :num_betas]
+        self.comp_register('shapedirs', torch.tensor(shapedirs, dtype=dtype), persistent=persistant_buffer)
+        if self.model_type == 'smplx':
+            if smpl_dict['shapedirs'].shape[-1] > 300:
+                begin_shape_id = 300
+            else:
+                begin_shape_id = 10
+                num_expressions = smpl_dict['shapedirs'].shape[-1] - 10
+            exprdirs = smpl_dict['shapedirs'][:, :, begin_shape_id:(begin_shape_id + num_expressions)]
+            self.comp_register('exprdirs', torch.tensor(exprdirs, dtype=dtype), persistent=persistant_buffer)
+            expression = torch.tensor(np.zeros((1, num_expressions)), dtype=dtype)
+            self.comp_register('init_expression', expression, persistent=persistant_buffer)
+        if self.use_dmpl:
+            dmpldirs = np.load(dmpl_fname)['eigvec']
+            dmpldirs = dmpldirs[:, :, :num_dmpls]
+            self.comp_register('dmpldirs', torch.tensor(dmpldirs, dtype=dtype), persistent=persistant_buffer)
+        # Regressor for joint locations given shape - 6890 x 24
+        self.comp_register('J_regressor', torch.tensor(smpl_dict['J_regressor'], dtype=dtype), persistent=persistant_buffer)
+        # Pose blend shape basis: 6890 x 3 x 207, reshaped to 6890*30 x 207
+        if use_posedirs:
+            posedirs = smpl_dict['posedirs']
+            posedirs = posedirs.reshape([posedirs.shape[0] * 3, -1]).T
+            self.comp_register('posedirs', torch.tensor(posedirs, dtype=dtype), persistent=persistant_buffer)
+        else:
+            self.posedirs = None
+        # indices of parents for each joints
+        kintree_table = smpl_dict['kintree_table'].astype(np.int32)
+        self.comp_register('kintree_table', torch.tensor(kintree_table, dtype=torch.int32), persistent=persistant_buffer)
+        # LBS weights
+        # weights = np.repeat(smpl_dict['weights'][np.newaxis], batch_size, axis=0)
+        weights = smpl_dict['weights']
+        self.comp_register('weights', torch.tensor(weights, dtype=dtype), persistent=persistant_buffer)
+        self.comp_register('init_trans', torch.zeros((1,3), dtype=dtype), persistent=persistant_buffer)
+        # self.register_parameter('trans', nn.Parameter(trans, requires_grad=True))
+        # root_orient
+        # if self.model_type in ['smpl', 'smplh']:
+        self.comp_register('init_root_orient', torch.zeros((1,3), dtype=dtype), persistent=persistant_buffer)
+        # pose_body
+        if self.model_type in ['smpl', 'smplh', 'smplx']:
+            self.comp_register('init_pose_body', torch.zeros((1,63), dtype=dtype), persistent=persistant_buffer)
+        elif self.model_type == 'animal_horse':
+            self.comp_register('init_pose_body', torch.zeros((1,105), dtype=dtype), persistent=persistant_buffer)
+        elif self.model_type == 'animal_dog':
+            self.comp_register('init_pose_body', torch.zeros((1,102), dtype=dtype), persistent=persistant_buffer)
+        # pose_hand
+        if self.model_type in ['smpl']:
+            self.comp_register('init_pose_hand', torch.zeros((1,1*3*2), dtype=dtype), persistent=persistant_buffer)
+        elif self.model_type in ['smplh', 'smplx']:
+            self.comp_register('init_pose_hand', torch.zeros((1,15*3*2), dtype=dtype), persistent=persistant_buffer)
+        elif self.model_type in ['mano']:
+            self.comp_register('init_pose_hand', torch.zeros((1,15*3), dtype=dtype), persistent=persistant_buffer)
+        # face poses
+        if self.model_type == 'smplx':
+            self.comp_register('init_pose_jaw', torch.zeros((1,1*3), dtype=dtype), persistent=persistant_buffer)
+            self.comp_register('init_pose_eye', torch.zeros((1,2*3), dtype=dtype), persistent=persistant_buffer)
+        self.comp_register('init_betas', torch.zeros((1,num_betas), dtype=dtype), persistent=persistant_buffer)
+        if self.use_dmpl:
+            self.comp_register('init_dmpls', torch.zeros((1,num_dmpls), dtype=dtype), persistent=persistant_buffer)
+    def comp_register(self, name, value, persistent=False):
+        if sys.version_info[0] > 2:
+            self.register_buffer(name, value, persistent)
+        else:
+            self.register_buffer(name, value)
+    def r(self):
+        from human_body_prior.tools.omni_tools import copy2cpu as c2c
+        return c2c(self.forward().v)
+    def forward(self, root_orient=None, pose_body=None, pose_hand=None, pose_jaw=None, pose_eye=None, betas=None,
+                trans=None, dmpls=None, expression=None, v_template =None, joints=None, v_shaped=None, return_dict=False,  **kwargs):
+        '''
+        :param root_orient: Nx3
+        :param pose_body:
+        :param pose_hand:
+        :param pose_jaw:
+        :param pose_eye:
+        :param kwargs:
+        :return:
+        '''
+        batch_size = 1
+        # compute batchsize by any of the provided variables
+        for arg in [root_orient,pose_body,pose_hand,pose_jaw,pose_eye,betas,trans, dmpls,expression, v_template,joints]:
+            if arg is not None:
+                batch_size = arg.shape[0]
+                break
+        # assert not (v_template is not None and betas is not None), ValueError('vtemplate and betas could not be used jointly.')
+        assert self.model_type in ['smpl', 'smplh', 'smplx', 'mano', 'animal_horse', 'animal_dog'], ValueError(
+            'model_type should be in smpl/smplh/smplx/mano')
+        if root_orient is None:  root_orient = self.init_root_orient.expand(batch_size, -1)
+        if self.model_type in ['smplh', 'smpl']:
+            if pose_body is None:  pose_body = self.init_pose_body.expand(batch_size, -1)
+            if pose_hand is None:  pose_hand = self.init_pose_hand.expand(batch_size, -1)
+        elif self.model_type == 'smplx':
+            if pose_body is None:  pose_body = self.init_pose_body.expand(batch_size, -1)
+            if pose_hand is None:  pose_hand = self.init_pose_hand.expand(batch_size, -1)
+            if pose_jaw is None:  pose_jaw = self.init_pose_jaw.expand(batch_size, -1)
+            if pose_eye is None:  pose_eye = self.init_pose_eye.expand(batch_size, -1)
+        elif self.model_type in ['mano',]:
+            if pose_hand is None:  pose_hand = self.init_pose_hand.expand(batch_size, -1)
+        elif self.model_type in ['animal_horse','animal_dog']:
+            if pose_body is None:  pose_body = self.init_pose_body.expand(batch_size, -1)
+        if pose_hand is None and self.model_type not in ['animal_horse', 'animal_dog']:  pose_hand = self.init_pose_hand.expand(batch_size, -1)
+        if trans is None: trans = self.init_trans.expand(batch_size, -1)
+        if v_template is None: v_template = self.init_v_template.expand(batch_size, -1,-1)
+        if betas is None: betas = self.init_betas.expand(batch_size, -1)
+        if self.model_type in ['smplh', 'smpl']:
+            full_pose = torch.cat([root_orient, pose_body, pose_hand], dim=-1)
+        elif self.model_type == 'smplx':
+            full_pose = torch.cat([root_orient, pose_body, pose_jaw, pose_eye, pose_hand], dim=-1)  # orient:3, body:63, jaw:3, eyel:3, eyer:3, handl, handr
+        elif self.model_type in ['mano', ]:
+            full_pose = torch.cat([root_orient, pose_hand], dim=-1)
+        elif self.model_type in ['animal_horse', 'animal_dog']:
+            full_pose = torch.cat([root_orient, pose_body], dim=-1)
+        if self.use_dmpl:
+            if dmpls is None: dmpls = self.init_dmpls.expand(batch_size, -1)
+            shape_components = torch.cat([betas, dmpls], dim=-1)
+            shapedirs = torch.cat([self.shapedirs, self.dmpldirs], dim=-1)
+        elif self.model_type == 'smplx':
+            if expression is None: expression = self.init_expression.expand(batch_size, -1)
+            shape_components = torch.cat([betas, expression], dim=-1)
+            shapedirs = torch.cat([self.shapedirs, self.exprdirs], dim=-1)
+        else:
+            shape_components = betas
+            shapedirs = self.shapedirs
+        verts, Jtr = lbs(betas=shape_components, pose=full_pose, v_template=v_template,
+                            shapedirs=shapedirs, posedirs=self.posedirs,
+                            J_regressor=self.J_regressor, parents=self.kintree_table[0].long(),
+                            lbs_weights=self.weights, joints=joints, v_shaped=v_shaped,
+                            dtype=self.dtype)
+        Jtr = Jtr + trans.unsqueeze(dim=1)
+        verts = verts + trans.unsqueeze(dim=1)
+        res = {}
+        res['v'] = verts
+        res['f'] = self.f
+        res['Jtr'] = Jtr  # Todo: ik can be made with vposer
+        # res['bStree_table'] = self.kintree_table
+        # if self.model_type == 'smpl':
+        #     res['pose_body'] = pose_body
+        # elif self.model_type == 'smplh':
+        #     res['pose_body'] = pose_body
+        #     res['pose_hand'] = pose_hand
+        # elif self.model_type == 'smplx':
+        #     res['pose_body'] = pose_body
+        #     res['pose_hand'] = pose_hand
+        #     res['pose_jaw'] = pose_jaw
+        #     res['pose_eye'] = pose_eye
+        # elif self.model_type in ['mano', 'mano']:
+        #     res['pose_hand'] = pose_hand
+        res['full_pose'] = full_pose
+        if not return_dict:
+            class result_meta(object):
+                pass
+            res_class = result_meta()
+            for k, v in res.items():
+                res_class.__setattr__(k, v)
+            res = res_class
+        return res

mogen/datasets/human_body_prior/body_model/lbs.py ADDED Viewed

	@@ -0,0 +1,404 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Vassilis Choutas <https://vchoutas.github.io/>
+#
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+import numpy as np
+import torch
+import torch.nn.functional as F
+def to_tensor(array, dtype=torch.float32):
+    if 'torch.tensor' not in str(type(array)):
+        return torch.tensor(array, dtype=dtype)
+class Struct(object):
+    def __init__(self, **kwargs):
+        for key, val in kwargs.items():
+            setattr(self, key, val)
+def to_np(array, dtype=np.float32):
+    if 'scipy.sparse' in str(type(array)):
+        array = array.todense()
+    return np.array(array, dtype=dtype)
+def rot_mat_to_euler(rot_mats):
+    # Calculates rotation matrix to euler angles
+    # Careful for extreme cases of eular angles like [0.0, pi, 0.0]
+    sy = torch.sqrt(rot_mats[:, 0, 0] * rot_mats[:, 0, 0] +
+                    rot_mats[:, 1, 0] * rot_mats[:, 1, 0])
+    return torch.atan2(-rot_mats[:, 2, 0], sy)
+def find_dynamic_lmk_idx_and_bcoords(vertices, pose, dynamic_lmk_faces_idx,
+                                     dynamic_lmk_b_coords,
+                                     neck_kin_chain, dtype=torch.float32):
+    ''' Compute the faces, barycentric coordinates for the dynamic landmarks
+        To do so, we first compute the rotation of the neck around the y-axis
+        and then use a pre-computed look-up table to find the faces and the
+        barycentric coordinates that will be used.
+        Special thanks to Soubhik Sanyal ([email protected])
+        for providing the original TensorFlow implementation and for the LUT.
+        Parameters
+        ----------
+        vertices: torch.tensor BxVx3, dtype = torch.float32
+            The tensor of input vertices
+        pose: torch.tensor Bx(Jx3), dtype = torch.float32
+            The current pose of the body model
+        dynamic_lmk_faces_idx: torch.tensor L, dtype = torch.long
+            The look-up table from neck rotation to faces
+        dynamic_lmk_b_coords: torch.tensor Lx3, dtype = torch.float32
+            The look-up table from neck rotation to barycentric coordinates
+        neck_kin_chain: list
+            A python list that contains the indices of the joints that form the
+            kinematic chain of the neck.
+        dtype: torch.dtype, optional
+        Returns
+        -------
+        dyn_lmk_faces_idx: torch.tensor, dtype = torch.long
+            A tensor of size BxL that contains the indices of the faces that
+            will be used to compute the current dynamic landmarks.
+        dyn_lmk_b_coords: torch.tensor, dtype = torch.float32
+            A tensor of size BxL that contains the indices of the faces that
+            will be used to compute the current dynamic landmarks.
+    '''
+    batch_size = vertices.shape[0]
+    aa_pose = torch.index_select(pose.view(batch_size, -1, 3), 1,
+                                 neck_kin_chain)
+    rot_mats = batch_rodrigues(
+        aa_pose.view(-1, 3), dtype=dtype).view(batch_size, -1, 3, 3)
+    rel_rot_mat = torch.eye(3, device=vertices.device,
+                            dtype=dtype).unsqueeze_(dim=0)
+    for idx in range(len(neck_kin_chain)):
+        rel_rot_mat = torch.bmm(rot_mats[:, idx], rel_rot_mat)
+    y_rot_angle = torch.round(
+        torch.clamp(-rot_mat_to_euler(rel_rot_mat) * 180.0 / np.pi,
+                    max=39)).to(dtype=torch.long)
+    neg_mask = y_rot_angle.lt(0).to(dtype=torch.long)
+    mask = y_rot_angle.lt(-39).to(dtype=torch.long)
+    neg_vals = mask * 78 + (1 - mask) * (39 - y_rot_angle)
+    y_rot_angle = (neg_mask * neg_vals +
+                   (1 - neg_mask) * y_rot_angle)
+    dyn_lmk_faces_idx = torch.index_select(dynamic_lmk_faces_idx,
+                                           0, y_rot_angle)
+    dyn_lmk_b_coords = torch.index_select(dynamic_lmk_b_coords,
+                                          0, y_rot_angle)
+    return dyn_lmk_faces_idx, dyn_lmk_b_coords
+def vertices2landmarks(vertices, faces, lmk_faces_idx, lmk_bary_coords):
+    ''' Calculates landmarks by barycentric interpolation
+        Parameters
+        ----------
+        vertices: torch.tensor BxVx3, dtype = torch.float32
+            The tensor of input vertices
+        faces: torch.tensor Fx3, dtype = torch.long
+            The faces of the mesh
+        lmk_faces_idx: torch.tensor L, dtype = torch.long
+            The tensor with the indices of the faces used to calculate the
+            landmarks.
+        lmk_bary_coords: torch.tensor Lx3, dtype = torch.float32
+            The tensor of barycentric coordinates that are used to interpolate
+            the landmarks
+        Returns
+        -------
+        landmarks: torch.tensor BxLx3, dtype = torch.float32
+            The coordinates of the landmarks for each mesh in the batch
+    '''
+    # Extract the indices of the vertices for each face
+    # BxLx3
+    batch_size, num_verts = vertices.shape[:2]
+    device = vertices.device
+    lmk_faces = torch.index_select(faces, 0, lmk_faces_idx.view(-1)).view(
+        batch_size, -1, 3)
+    lmk_faces += torch.arange(
+        batch_size, dtype=torch.long, device=device).view(-1, 1, 1) * num_verts
+    lmk_vertices = vertices.view(-1, 3)[lmk_faces].view(
+        batch_size, -1, 3, 3)
+    landmarks = torch.einsum('blfi,blf->bli', [lmk_vertices, lmk_bary_coords])
+    return landmarks
+def lbs(betas, pose, v_template, shapedirs, posedirs, J_regressor, parents,
+        lbs_weights, joints = None, pose2rot=True, v_shaped=None, dtype=torch.float32):
+    ''' Performs Linear Blend Skinning with the given shape and pose parameters
+        Parameters
+        ----------
+        betas : torch.tensor BxNB
+            The tensor of shape parameters
+        pose : torch.tensor Bx(J + 1) * 3
+            The pose parameters in axis-angle format
+        v_template torch.tensor BxVx3
+            The template mesh that will be deformed
+        shapedirs : torch.tensor 1xNB
+            The tensor of PCA shape displacements
+        posedirs : torch.tensor Px(V * 3)
+            The pose PCA coefficients
+        J_regressor : torch.tensor JxV
+            The regressor array that is used to calculate the joints from
+            the position of the vertices
+        parents: torch.tensor J
+            The array that describes the kinematic tree for the model
+        lbs_weights: torch.tensor N x V x (J + 1)
+            The linear blend skinning weights that represent how much the
+            rotation matrix of each part affects each vertex
+        pose2rot: bool, optional
+            Flag on whether to convert the input pose tensor to rotation
+            matrices. The default value is True. If False, then the pose tensor
+            should already contain rotation matrices and have a size of
+            Bx(J + 1)x9
+        dtype: torch.dtype, optional
+        Returns
+        -------
+        verts: torch.tensor BxVx3
+            The vertices of the mesh after applying the shape and pose
+            displacements.
+        joints: torch.tensor BxJx3
+            The joints of the model
+    '''
+    batch_size = max(betas.shape[0], pose.shape[0])
+    device = betas.device
+    # Add shape contribution
+    if v_shaped is None:
+        v_shaped = v_template + blend_shapes(betas, shapedirs)
+    # Get the joints
+    # NxJx3 array
+    if joints is not None:
+        J = joints
+    else:
+        J = vertices2joints(J_regressor, v_shaped)
+    # 3. Add pose blend shapes
+    # N x J x 3 x 3
+    ident = torch.eye(3, dtype=dtype, device=device)
+    if pose2rot:
+        rot_mats = batch_rodrigues(
+            pose.view(-1, 3), dtype=dtype).view([batch_size, -1, 3, 3])
+        pose_feature = (rot_mats[:, 1:, :, :] - ident).view([batch_size, -1])
+        # (N x P) x (P, V * 3) -> N x V x 3
+        pose_offsets = torch.matmul(pose_feature, posedirs).view(batch_size, -1, 3)
+    else:
+        pose_feature = pose[:, 1:].view(batch_size, -1, 3, 3) - ident
+        rot_mats = pose.view(batch_size, -1, 3, 3)
+        pose_offsets = torch.matmul(pose_feature.view(batch_size, -1),
+                                    posedirs).view(batch_size, -1, 3)
+    v_posed = pose_offsets + v_shaped
+    # 4. Get the global joint location
+    J_transformed, A = batch_rigid_transform(rot_mats, J, parents, dtype=dtype)
+    # 5. Do skinning:
+    # W is N x V x (J + 1)
+    W = lbs_weights.unsqueeze(dim=0).expand([batch_size, -1, -1])
+    # (N x V x (J + 1)) x (N x (J + 1) x 16)
+    num_joints = J_regressor.shape[0]
+    T = torch.matmul(W, A.view(batch_size, num_joints, 16)) \
+        .view(batch_size, -1, 4, 4)
+    homogen_coord = torch.ones([batch_size, v_posed.shape[1], 1],
+                               dtype=dtype, device=device)
+    v_posed_homo = torch.cat([v_posed, homogen_coord], dim=2)
+    v_homo = torch.matmul(T, torch.unsqueeze(v_posed_homo, dim=-1))
+    verts = v_homo[:, :, :3, 0]
+    return verts, J_transformed
+def vertices2joints(J_regressor, vertices):
+    ''' Calculates the 3D joint locations from the vertices
+    Parameters
+    ----------
+    J_regressor : torch.tensor JxV
+        The regressor array that is used to calculate the joints from the
+        position of the vertices
+    vertices : torch.tensor BxVx3
+        The tensor of mesh vertices
+    Returns
+    -------
+    torch.tensor BxJx3
+        The location of the joints
+    '''
+    return torch.einsum('bik,ji->bjk', [vertices, J_regressor])
+def blend_shapes(betas, shape_disps):
+    ''' Calculates the per vertex displacement due to the blend shapes
+    Parameters
+    ----------
+    betas : torch.tensor Bx(num_betas)
+        Blend shape coefficients
+    shape_disps: torch.tensor Vx3x(num_betas)
+        Blend shapes
+    Returns
+    -------
+    torch.tensor BxVx3
+        The per-vertex displacement due to shape deformation
+    '''
+    # Displacement[b, m, k] = sum_{l} betas[b, l] * shape_disps[m, k, l]
+    # i.e. Multiply each shape displacement by its corresponding beta and
+    # then sum them.
+    #print(betas.device,shape_disps.device)
+    blend_shape = torch.einsum('bl,mkl->bmk', [betas, shape_disps])
+    return blend_shape
+def batch_rodrigues(rot_vecs, epsilon=1e-8, dtype=torch.float32):
+    ''' Calculates the rotation matrices for a batch of rotation vectors
+        Parameters
+        ----------
+        rot_vecs: torch.tensor Nx3
+            array of N axis-angle vectors
+        Returns
+        -------
+        R: torch.tensor Nx3x3
+            The rotation matrices for the given axis-angle parameters
+    '''
+    batch_size = rot_vecs.shape[0]
+    device = rot_vecs.device
+    angle = torch.norm(rot_vecs + 1e-8, dim=1, keepdim=True)
+    rot_dir = rot_vecs / angle
+    cos = torch.unsqueeze(torch.cos(angle), dim=1)
+    sin = torch.unsqueeze(torch.sin(angle), dim=1)
+    # Bx1 arrays
+    rx, ry, rz = torch.split(rot_dir, 1, dim=1)
+    K = torch.zeros((batch_size, 3, 3), dtype=dtype, device=device)
+    zeros = torch.zeros((batch_size, 1), dtype=dtype, device=device)
+    K = torch.cat([zeros, -rz, ry, rz, zeros, -rx, -ry, rx, zeros], dim=1) \
+        .view((batch_size, 3, 3))
+    ident = torch.eye(3, dtype=dtype, device=device).unsqueeze(dim=0)
+    rot_mat = ident + sin * K + (1 - cos) * torch.bmm(K, K)
+    return rot_mat
+def transform_mat(R, t):
+    ''' Creates a batch of transformation matrices
+        Args:
+            - R: Bx3x3 array of a batch of rotation matrices
+            - t: Bx3x1 array of a batch of translation vectors
+        Returns:
+            - T: Bx4x4 Transformation matrix
+    '''
+    # No padding left or right, only add an extra row
+    return torch.cat([F.pad(R, [0, 0, 0, 1]),
+                      F.pad(t, [0, 0, 0, 1], value=1)], dim=2)
+def batch_rigid_transform(rot_mats, joints, parents, dtype=torch.float32):
+    """
+    Applies a batch of rigid transformations to the joints
+    Parameters
+    ----------
+    rot_mats : torch.tensor BxNx3x3
+        Tensor of rotation matrices
+    joints : torch.tensor BxNx3
+        Locations of joints
+    parents : torch.tensor BxN
+        The kinematic tree of each object
+    dtype : torch.dtype, optional:
+        The data type of the created tensors, the default is torch.float32
+    Returns
+    -------
+    posed_joints : torch.tensor BxNx3
+        The locations of the joints after applying the pose rotations
+    rel_transforms : torch.tensor BxNx4x4
+        The relative (with respect to the root joint) rigid transformations
+        for all the joints
+    """
+    joints = torch.unsqueeze(joints, dim=-1)
+    rel_joints = joints.clone()
+    rel_joints[:, 1:] -= joints[:, parents[1:]]
+    transforms_mat = transform_mat(
+        rot_mats.reshape(-1, 3, 3),
+        rel_joints.reshape(-1, 3, 1)).reshape(-1, joints.shape[1], 4, 4)
+    transform_chain = [transforms_mat[:, 0]]
+    for i in range(1, parents.shape[0]):
+        # Subtract the joint location at the rest pose
+        # No need for rotation, since it's identity when at rest
+        curr_res = torch.matmul(transform_chain[parents[i]],
+                                transforms_mat[:, i])
+        transform_chain.append(curr_res)
+    transforms = torch.stack(transform_chain, dim=1)
+    # The last column of the transformations contains the posed joints
+    posed_joints = transforms[:, :, :3, 3]
+    # The last column of the transformations contains the posed joints
+    posed_joints = transforms[:, :, :3, 3]
+    joints_homogen = F.pad(joints, [0, 0, 0, 1])
+    rel_transforms = transforms - F.pad(
+        torch.matmul(transforms, joints_homogen), [3, 0, 0, 0, 0, 0, 0, 0])
+    return posed_joints, rel_transforms

mogen/datasets/human_body_prior/body_model/parts_segm/readme ADDED Viewed

	@@ -0,0 +1 @@


1	+ ### Parts segmentation file obtained from https://github.com/vchoutas/torch-mesh-isect#examples and put here for convenience

mogen/datasets/human_body_prior/body_model/rigid_object_model.py ADDED Viewed

	@@ -0,0 +1,67 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2018.12.13
+import numpy as np
+import torch
+import torch.nn as nn
+# from smplx.lbs import lbs
+from human_body_prior.body_model.lbs import lbs
+# import trimesh # dont use this package for loading meshes since it messes up the order of vertices
+from psbody.mesh import Mesh
+from human_body_prior.body_model.lbs import batch_rodrigues
+class RigidObjectModel(nn.Module):
+    def __init__(self, plpath, batch_size=1, dtype=torch.float32):
+        super(RigidObjectModel, self).__init__()
+        trans = torch.tensor(np.zeros((batch_size, 3)), dtype=dtype, requires_grad=True)
+        self.register_parameter('trans', nn.Parameter(trans, requires_grad=True))
+        root_orient = torch.tensor(np.zeros((batch_size, 3)), dtype=dtype, requires_grad=True)
+        self.register_parameter('root_orient', nn.Parameter(root_orient, requires_grad=True))
+        mesh = Mesh(filename=plpath)
+        self.rigid_v = torch.from_numpy(np.repeat(mesh.v[np.newaxis], batch_size, axis=0)).type(dtype)
+        self.f = torch.from_numpy(mesh.f.astype(np.int32))
+    def forward(self, root_orient, trans):
+        if root_orient is None: root_orient = self.root_orient
+        if trans is None: trans = self.trans
+        verts = torch.bmm(self.rigid_v, batch_rodrigues(root_orient)) + trans.view(-1,1,3)
+        res = {}
+        res['v'] = verts
+        res['f'] = self.f
+        class result_meta(object): pass
+        res_class = result_meta()
+        for k, v in res.items():
+            res_class.__setattr__(k, v)
+        res = res_class
+        return res

mogen/datasets/human_body_prior/models/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2020.12.12

mogen/datasets/human_body_prior/models/ik_engine.py ADDED Viewed

	@@ -0,0 +1,287 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2021.02.12
+from typing import List, Dict
+from psbody.mesh import Mesh
+from body_visualizer.tools.psbody_mesh_tools import rotateXYZ, points_to_cubes, points_to_spheres
+from torch import nn
+import torch
+from human_body_prior.tools.model_loader import load_model
+import numpy as np
+from body_visualizer.tools.vis_tools import colors
+from human_body_prior.tools.omni_tools import copy2cpu as c2c
+from psbody.mesh import MeshViewers
+from human_body_prior.tools.omni_tools import log2file
+from human_body_prior.models.vposer_model import VPoser
+from human_body_prior.tools.omni_tools import flatten_list
+def visualize(points, bm_f, mvs, kpts_colors, verbosity=2, logger=None):
+    from human_body_prior.tools.omni_tools import log2file
+    if logger is None: logger = log2file()
+    def view(opt_objs, body_v, virtual_markers, opt_it):
+        if verbosity <= 0: return
+        opt_objs_cpu = {k: c2c(v) for k, v in opt_objs.items()}
+        total_loss = np.sum([np.sum(v) for k, v in opt_objs_cpu.items()])
+        message = 'it {} -- [total loss = {:.2e}] - {}'.format(opt_it, total_loss, ' | '.join(['%s = %2.2e' % (k, np.sum(v)) for k, v in opt_objs_cpu.items()]))
+        logger(message)
+        if verbosity>1:
+            bs = body_v.shape[0]
+            np.random.seed(100)
+            frame_ids = list(range(bs)) if bs <= len(mvs) else np.random.choice(bs , size=len(mvs), replace=False).tolist()
+            if bs > len(mvs): message += ' -- [frame_ids: {}]'.format(frame_ids)
+            for dispId, fId in enumerate(frame_ids): # check for the number of frames in mvs and show a randomly picked number of frames in body if there is more to show than row*cols available
+                new_body_v = rotateXYZ(body_v[fId], [-90,0,0])
+                orig_mrk_mesh = points_to_spheres(rotateXYZ(c2c(points[fId]), [-90,0,0]), radius=0.01, color=kpts_colors)
+                virtual_markers_mesh = points_to_cubes(rotateXYZ(virtual_markers[fId], [-90,0,0]), radius=0.01, color=kpts_colors)
+                new_body_mesh = Mesh(new_body_v, bm_f, vc=colors['grey'])
+                # linev = rotateXYZ(np.hstack((c2c(points[fId]), virtual_markers[fId])).reshape((-1, 3)), [-90,0,0])
+                # linee = np.arange(len(linev)).reshape((-1, 2))
+                # ll = Lines(v=linev, e=linee)
+                # ll.vc = (ll.v * 0. + 1) * np.array([0.00, 0.00, 1.00])
+                # mvs[dispId].set_dynamic_lines([ll])
+                # orig_mrk_mesh = points_to_spheres(data_pc, radius=0.01, vc=colors['blue'])
+                mvs[dispId].set_dynamic_meshes([orig_mrk_mesh, virtual_markers_mesh])
+                mvs[dispId].set_static_meshes([new_body_mesh])
+            mvs[0].set_titlebar(message)
+            # if out_dir is not None: mv.save_snapshot(os.path.join(out_dir, '%05d_it_%.5d.png' %(frame_id, opt_it)))
+    return view
+class AdamInClosure():
+    def __init__(self, var_list, lr, max_iter=100, tolerance_change=1e-5):
+        self.optimizer = torch.optim.Adam(var_list, lr)
+        self.max_iter = max_iter
+        self.tolerance_change = tolerance_change
+    def step(self, closure):
+        prev_loss = None
+        for it in range(self.max_iter):
+            loss = closure()
+            self.optimizer.step()
+            if prev_loss is None:
+                prev_loss = loss
+                continue
+            if torch.isnan(loss):
+                # breakpoint()
+                break
+            if abs(loss - prev_loss) <  self.tolerance_change:
+                print('abs(loss - prev_loss) <  self.tolerance_change')
+                break
+    def zero_grad(self):
+        self.optimizer.zero_grad()
+def ik_fit(optimizer, source_kpts_model, static_vars, vp_model, extra_params={}, on_step=None, gstep=0):
+    data_loss = extra_params.get('data_loss', torch.nn.SmoothL1Loss(reduction='mean'))
+    # data_loss =
+    # data_loss = torch.nn.L1Loss(reduction='mean')#change with SmoothL1
+    def fit(weights, free_vars):
+        fit.gstep += 1
+        optimizer.zero_grad()
+        free_vars['pose_body'] = vp_model.decode(free_vars['poZ_body'])['pose_body'].contiguous().view(-1, 63)
+        nonan_mask = torch.isnan(free_vars['poZ_body']).sum(-1) == 0
+        opt_objs = {}
+        res = source_kpts_model(free_vars)
+        opt_objs['data'] = data_loss(res['source_kpts'], static_vars['target_kpts'])
+        opt_objs['betas'] = torch.pow(free_vars['betas'][nonan_mask],2).sum()
+        opt_objs['poZ_body'] = torch.pow(free_vars['poZ_body'][nonan_mask],2).sum()
+        opt_objs = {k: opt_objs[k]*v for k, v in weights.items() if k in opt_objs.keys()}
+        loss_total = torch.sum(torch.stack(list(opt_objs.values())))
+        # breakpoint()
+        loss_total.backward()
+        if on_step is not None:
+            on_step(opt_objs, c2c(res['body'].v), c2c(res['source_kpts']), fit.gstep)
+        fit.free_vars = {k:v for k,v in free_vars.items()}# if k in IK_Engine.fields_to_optimize}
+        # fit.nonan_mask = nonan_mask
+        fit.final_loss = loss_total
+        return loss_total
+    fit.gstep = gstep
+    fit.final_loss = None
+    fit.free_vars = {}
+    # fit.nonan_mask = None
+    return fit
+class IK_Engine(nn.Module):
+    def __init__(self,
+                 vposer_expr_dir: str,
+                 data_loss,
+                 optimizer_args: dict={'type':'ADAM'},
+                 stepwise_weights: List[Dict]=[{'data': 10., 'poZ_body': .01, 'betas': .5}],
+                 display_rc: tuple = (2,1),
+                 verbosity: int = 1,
+                 logger=None,
+                 ):
+        '''
+        :param vposer_expr_dir: The vposer directory that holds the settings and model snapshot
+        :param data_loss: should be a pytorch callable (source, target) that returns the accumulated loss
+        :param optimizer_args: arguments for optimizers
+        :param stepwise_weights: list of dictionaries. each list element defines weights for one full step of optimization
+                                 if a weight value is left out, its respective object item will be removed as well. imagine optimizing without data term!
+        :param display_rc: number of row and columns in case verbosity > 1
+        :param verbosity: 0: silent, 1: text, 2: text/visual. running 2 over ssh would need extra work
+        :param logger: an instance of human_body_prior.tools.omni_tools.log2file
+        '''
+        super(IK_Engine, self).__init__()
+        assert isinstance(stepwise_weights, list), ValueError('stepwise_weights should be a list of dictionaries.')
+        assert np.all(['data' in l for l in stepwise_weights]), ValueError('The term data should be available in every weight of anealed optimization step: {}'.format(stepwise_weights))
+        self.data_loss = torch.nn.SmoothL1Loss(reduction='mean') if data_loss is None else data_loss
+        self.stepwise_weights = stepwise_weights
+        self.verbosity = verbosity
+        self.optimizer_args = optimizer_args
+        self.logger  = log2file() if logger is None else logger
+        if verbosity>1:
+            mvs = MeshViewers(display_rc, keepalive=True)
+            self.mvs = flatten_list(mvs)
+            self.mvs[0].set_background_color(colors['white'])
+        else:
+            self.mvs=None
+        self.vp_model, _ = load_model(vposer_expr_dir,
+                                      model_code=VPoser,
+                                      remove_words_in_model_weights='vp_model.',
+                                      disable_grad=True)
+    def forward(self, source_kpts, target_kpts, initial_body_params={}):
+        '''
+        source_kpts is a function that given body parameters computes source key points that should match target key points
+        Try to reconstruct the bps signature by optimizing the body_poZ
+        '''
+        # if self.rt_ps.verbosity > 0: self.logger('Processing {} frames'.format(points.shape[0]))
+        bs = target_kpts.shape[0]
+        on_step = visualize(target_kpts,
+                            kpts_colors=source_kpts.kpts_colors,
+                            bm_f=source_kpts.bm_f,
+                            mvs=self.mvs,
+                            verbosity=self.verbosity,
+                            logger=self.logger)
+        comp_device = target_kpts.device
+        # comp_device = self.vp_model.named_parameters().__next__()[1].device
+        if 'pose_body' not in initial_body_params:
+            initial_body_params['pose_body'] = torch.zeros([bs, 63], device=comp_device, dtype=torch.float, requires_grad=False)
+        if 'trans' not in initial_body_params:
+            initial_body_params['trans'] = torch.zeros([bs, 3], device=comp_device, dtype=torch.float, requires_grad=False)
+        if 'betas' not in initial_body_params:
+            initial_body_params['betas'] = torch.zeros([bs, 10], device=comp_device, dtype=torch.float, requires_grad=False)
+        if 'root_orient' not in initial_body_params:
+            initial_body_params['root_orient'] = torch.zeros([bs, 3], device=comp_device, dtype=torch.float, requires_grad=False)
+        initial_body_params['poZ_body'] = self.vp_model.encode(initial_body_params['pose_body']).mean
+        free_vars = {k: torch.nn.Parameter(v.detach(), requires_grad=True) for k,v in initial_body_params.items() if k in ['betas', 'trans', 'poZ_body', 'root_orient']}
+        static_vars = {
+                    'target_kpts': target_kpts,
+                       # 'trans': initial_body_params['trans'].detach(),
+                       # 'betas': initial_body_params['betas'].detach(),
+                       # 'poZ_body': initial_body_params['poZ_body'].detach()
+                       }
+        if self.optimizer_args['type'].upper() == 'LBFGS':
+            optimizer = torch.optim.LBFGS(list(free_vars.values()),
+                                          lr=self.optimizer_args.get('lr', 1),
+                                          max_iter=self.optimizer_args.get('max_iter', 100),
+                                          tolerance_change=self.optimizer_args.get('tolerance_change', 1e-5),
+                                          max_eval=self.optimizer_args.get('max_eval', None),
+                                          history_size=self.optimizer_args.get('history_size', 100),
+                                          line_search_fn='strong_wolfe')
+        elif self.optimizer_args['type'].upper() == 'ADAM':
+            optimizer = AdamInClosure(list(free_vars.values()),
+                                      lr=self.optimizer_args.get('lr', 1e-3),
+                                      max_iter=self.optimizer_args.get('max_iter', 100),
+                                      tolerance_change=self.optimizer_args.get('tolerance_change', 1e-5),
+                                      )
+        else:
+            raise ValueError('optimizer_type not recognized.')
+        gstep = 0
+        closure = ik_fit(optimizer,
+                         source_kpts_model=source_kpts,
+                         static_vars=static_vars,
+                         vp_model=self.vp_model,
+                         extra_params={'data_loss': self.data_loss},
+                         on_step=on_step,
+                         gstep=gstep)
+        # try:
+        for wts in self.stepwise_weights:
+            optimizer.step(lambda: closure(wts, free_vars))
+            free_vars = closure.free_vars
+        # except:
+        #
+        #     pass
+        # if closure.final_loss is None or torch.isnan(closure.final_loss) or torch.any(torch.isnan(free_vars['trans'])):
+        #     if self.verbosity > 0:
+        #         self.logger('NaN observed in the optimization results. you might want to restart the refinment procedure.')
+        #     breakpoint()
+        #     return None
+        return closure.free_vars#, closure.nonan_mask

mogen/datasets/human_body_prior/models/model_components.py ADDED Viewed

	@@ -0,0 +1,41 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2020.12.12
+from torch import nn
+class View(nn.Module):
+    def __init__(self, *args):
+        super(View, self).__init__()
+        self.shape = args
+        self._name = 'reshape'
+    def forward(self, x):
+        return x.view(self.shape)
+class BatchFlatten(nn.Module):
+    def __init__(self):
+        super(BatchFlatten, self).__init__()
+        self._name = 'batch_flatten'
+    def forward(self, x):
+        return x.view(x.shape[0], -1)

mogen/datasets/human_body_prior/models/vposer_model.py ADDED Viewed

	@@ -0,0 +1,133 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2020.12.12
+import numpy as np
+import torch
+from human_body_prior.models.model_components import BatchFlatten
+from human_body_prior.tools.rotation_tools import matrot2aa
+from torch import nn
+from torch.nn import functional as F
+class ContinousRotReprDecoder(nn.Module):
+    def __init__(self):
+        super(ContinousRotReprDecoder, self).__init__()
+    def forward(self, module_input):
+        reshaped_input = module_input.view(-1, 3, 2)
+        b1 = F.normalize(reshaped_input[:, :, 0], dim=1)
+        dot_prod = torch.sum(b1 * reshaped_input[:, :, 1], dim=1, keepdim=True)
+        b2 = F.normalize(reshaped_input[:, :, 1] - dot_prod * b1, dim=-1)
+        b3 = torch.cross(b1, b2, dim=1)
+        return torch.stack([b1, b2, b3], dim=-1)
+class NormalDistDecoder(nn.Module):
+    def __init__(self, num_feat_in, latentD):
+        super(NormalDistDecoder, self).__init__()
+        self.mu = nn.Linear(num_feat_in, latentD)
+        self.logvar = nn.Linear(num_feat_in, latentD)
+    def forward(self, Xout):
+        return torch.distributions.normal.Normal(self.mu(Xout), F.softplus(self.logvar(Xout)))
+class VPoser(nn.Module):
+    def __init__(self, model_ps):
+        super(VPoser, self).__init__()
+        num_neurons, self.latentD = model_ps.model_params.num_neurons, model_ps.model_params.latentD
+        self.num_joints = 21
+        n_features = self.num_joints * 3
+        self.encoder_net = nn.Sequential(
+            BatchFlatten(),
+            nn.BatchNorm1d(n_features),
+            nn.Linear(n_features, num_neurons),
+            nn.LeakyReLU(),
+            nn.BatchNorm1d(num_neurons),
+            nn.Dropout(0.1),
+            nn.Linear(num_neurons, num_neurons),
+            nn.Linear(num_neurons, num_neurons),
+            NormalDistDecoder(num_neurons, self.latentD)
+        )
+        self.decoder_net = nn.Sequential(
+            nn.Linear(self.latentD, num_neurons),
+            nn.LeakyReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(num_neurons, num_neurons),
+            nn.LeakyReLU(),
+            nn.Linear(num_neurons, self.num_joints * 6),
+            ContinousRotReprDecoder(),
+        )
+    def encode(self, pose_body):
+        '''
+        :param Pin: Nx(numjoints*3)
+        :param rep_type: 'matrot'/'aa' for matrix rotations or axis-angle
+        :return:
+        '''
+        return self.encoder_net(pose_body)
+    def decode(self, Zin):
+        bs = Zin.shape[0]
+        prec = self.decoder_net(Zin)
+        return {
+            'pose_body': matrot2aa(prec.view(-1, 3, 3)).view(bs, -1, 3),
+            'pose_body_matrot': prec.view(bs, -1, 9)
+        }
+    def forward(self, pose_body):
+        '''
+        :param Pin: aa: Nx1xnum_jointsx3 / matrot: Nx1xnum_jointsx9
+        :param input_type: matrot / aa for matrix rotations or axis angles
+        :param output_type: matrot / aa
+        :return:
+        '''
+        q_z = self.encode(pose_body)
+        q_z_sample = q_z.rsample()
+        decode_results = self.decode(q_z_sample)
+        decode_results.update({'poZ_body_mean': q_z.mean, 'poZ_body_std': q_z.scale, 'q_z': q_z})
+        return decode_results
+    def sample_poses(self, num_poses, seed=None):
+        np.random.seed(seed)
+        some_weight = [a for a in self.parameters()][0]
+        dtype = some_weight.dtype
+        device = some_weight.device
+        self.eval()
+        with torch.no_grad():
+            Zgen = torch.tensor(np.random.normal(0., 1., size=(num_poses, self.latentD)), dtype=dtype, device=device)
+        return self.decode(Zgen)

mogen/datasets/human_body_prior/tools/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2020.12.12

mogen/datasets/human_body_prior/tools/angle_continuous_repres.py ADDED Viewed

	@@ -0,0 +1,80 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2020.12.12
+import torch.nn.functional as F
+import torch
+from torch import nn
+import numpy as np
+# numpy implementation of yi zhou's method
+def norm(v):
+    return v/np.linalg.norm(v)
+def gs(M):
+    a1 = M[:,0]
+    a2 = M[:,1]
+    b1 = norm(a1)
+    b2 = norm((a2-np.dot(b1,a2)*b1))
+    b3 = np.cross(b1,b2)
+    return np.vstack([b1,b2,b3]).T
+# input sz bszx3x2
+def bgs(d6s):
+    bsz = d6s.shape[0]
+    b1 = F.normalize(d6s[:,:,0], p=2, dim=1)
+    a2 = d6s[:,:,1]
+    c = torch.bmm(b1.view(bsz,1,-1),a2.view(bsz,-1,1)).view(bsz,1)*b1
+    b2 = F.normalize(a2-c,p=2,dim=1)
+    b3=torch.cross(b1,b2,dim=1)
+    return torch.stack([b1,b2,b3],dim=1).permute(0,2,1)
+class geodesic_loss_R(nn.Module):
+    def __init__(self, reduction='batchmean'):
+        super(geodesic_loss_R, self).__init__()
+        self.reduction = reduction
+        self.eps = 1e-6
+    # batch geodesic loss for rotation matrices
+    def bgdR(self,m1,m2):
+        batch = m1.shape[0]
+        m = torch.bmm(m1, m2.transpose(1, 2))  # batch*3*3
+        cos = (m[:, 0, 0] + m[:, 1, 1] + m[:, 2, 2] - 1) / 2
+        cos = torch.min(cos, m1.new(np.ones(batch)))
+        cos = torch.max(cos, m1.new(np.ones(batch)) * -1)
+        return torch.acos(cos)
+    def forward(self, ypred, ytrue):
+        theta = self.bgdR(ypred,ytrue)
+        if self.reduction == 'mean':
+            return torch.mean(theta)
+        if self.reduction == 'batchmean':
+            breakpoint()
+            return torch.mean(torch.sum(theta, dim=theta.shape[1:]))
+        else:
+            return theta

mogen/datasets/human_body_prior/tools/configurations.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2020.12.12
+from dotmap import DotMap
+import os
+import yaml
+def load_config(default_ps_fname=None, **kwargs):
+    if isinstance(default_ps_fname, str):
+        assert os.path.exists(default_ps_fname), FileNotFoundError(default_ps_fname)
+        assert default_ps_fname.lower().endswith('.yaml'), NotImplementedError('Only .yaml files are accepted.')
+        default_ps = yaml.safe_load(open(default_ps_fname, 'r'))
+    else:
+        default_ps = {}
+    default_ps.update(kwargs)
+    return DotMap(default_ps, _dynamic=False)
+def dump_config(data, fname):
+    '''
+    dump current configuration to an ini file
+    :param fname:
+    :return:
+    '''
+    with open(fname, 'w') as file:
+        yaml.dump(data.toDict(), file)
+    return fname

mogen/datasets/human_body_prior/tools/model_loader.py ADDED Viewed

	@@ -0,0 +1,87 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by: Nima Ghorbani <https://www.linkedin.com/in/nghorbani/>
+# 2018.01.02
+import os, glob
+import numpy as np
+from human_body_prior.tools.configurations import load_config, dump_config
+import os.path as osp
+def exprdir2model(expr_dir):
+    if not os.path.exists(expr_dir): raise ValueError('Could not find the experiment directory: %s' % expr_dir)
+    model_snapshots_dir = osp.join(expr_dir, 'snapshots')
+    available_ckpts = sorted(glob.glob(osp.join(model_snapshots_dir, '*.ckpt')), key=osp.getmtime)
+    assert len(available_ckpts) > 0, ValueError('No checck points found at {}'.format(model_snapshots_dir))
+    trained_weigths_fname = available_ckpts[-1]
+    model_ps_fname = glob.glob(osp.join('/', '/'.join(trained_weigths_fname.split('/')[:-2]), '*.yaml'))
+    if len(model_ps_fname) == 0:
+        model_ps_fname = glob.glob(osp.join('/'.join(trained_weigths_fname.split('/')[:-2]), '*.yaml'))
+    model_ps_fname = model_ps_fname[0]
+    model_ps = load_config(default_ps_fname=model_ps_fname)
+    model_ps.logging.best_model_fname = trained_weigths_fname
+    return model_ps, trained_weigths_fname
+def load_model(expr_dir, model_code=None, remove_words_in_model_weights=None, load_only_ps=False, disable_grad=True, custom_ps = None):
+    '''
+    :param expr_dir:
+    :param model_code: an imported module
+    from supercap.train.supercap_smpl import SuperCap, then pass SuperCap to this function
+    :param if True will load the model definition used for training, and not the one in current repository
+    :return:
+    '''
+    import importlib
+    import torch
+    model_ps, trained_weigths_fname = exprdir2model(expr_dir)
+    if load_only_ps: return model_ps
+    if custom_ps is not None: model_ps = custom_ps
+    assert model_code is not None, ValueError('mode_code should be provided')
+    model_instance = model_code(model_ps)
+    if disable_grad: # i had to do this. torch.no_grad() couldnt achieve what i was looking for
+        for param in model_instance.parameters():
+            param.requires_grad = False
+    state_dict = torch.load(trained_weigths_fname)['state_dict']
+    if remove_words_in_model_weights is not None:
+        words = '{}'.format(remove_words_in_model_weights)
+        state_dict = {k.replace(words, '') if k.startswith(words) else k: v for k, v in state_dict.items()}
+    ## keys that were in the model trained file and not in the current model
+    instance_model_keys = list(model_instance.state_dict().keys())
+    trained_model_keys = list(state_dict.keys())
+    wts_in_model_not_in_file = set(instance_model_keys).difference(set(trained_model_keys))
+    ## keys that are in the current model not in the training weights
+    wts_in_file_not_in_model = set(trained_model_keys).difference(set(instance_model_keys))
+    # assert len(wts_in_model_not_in_file) == 0, ValueError('Some model weights are not present in the pretrained file. {}'.format(wts_in_model_not_in_file))
+    state_dict = {k:v for k, v in state_dict.items() if k in instance_model_keys}
+    model_instance.load_state_dict(state_dict, strict=False) # Todo fix the issues so that we can set the strict to true. The body model uses unnecessary registered buffers
+    model_instance.eval()
+    return model_instance, model_ps

mogen/datasets/human_body_prior/tools/omni_tools.py ADDED Viewed

	@@ -0,0 +1,163 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2018.01.02
+import numpy as np
+import random
+import torch
+import os
+import sys
+import os.path as osp
+def copy2cpu(tensor):
+    if isinstance(tensor, np.ndarray): return tensor
+    return tensor.detach().cpu().numpy()
+def create_list_chunks(list_, group_size, overlap_size, cut_smaller_batches=True):
+    if cut_smaller_batches:
+        return [list_[i:i + group_size] for i in range(0, len(list_), group_size - overlap_size) if len(list_[i:i + group_size])==group_size]
+    else:
+        return [list_[i:i + group_size] for i in range(0, len(list_), group_size - overlap_size)]
+def trainable_params_count(params):
+    return  sum([p.numel() for p in params if p.requires_grad])
+def flatten_list(l):
+    return [item for sublist in l for item in sublist]
+def get_support_data_dir(current_fname=__file__):
+    support_data_dir = osp.abspath(current_fname)
+    support_data_dir_split = support_data_dir.split('/')
+    support_data_dir = '/'.join(support_data_dir_split[:support_data_dir_split.index('src')])
+    support_data_dir = osp.join(support_data_dir, 'support_data')
+    assert osp.exists(support_data_dir)
+    return support_data_dir
+def make_deterministic(seed):
+    random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    np.random.seed(seed)
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def id_generator(size=13):
+    import string
+    import random
+    chars = string.ascii_uppercase + string.digits
+    return ''.join(random.choice(chars) for _ in range(size))
+def logger_sequencer(logger_list, prefix=None):
+    def post_text(text):
+        if prefix is not None: text = '{} -- '.format(prefix) + text
+        for logger_call in logger_list: logger_call(text)
+    return post_text
+class log2file():
+    def __init__(self,logpath=None, prefix='', auto_newline = True, write2file_only=False):
+        if logpath is not None:
+            makepath(logpath, isfile=True)
+            self.fhandle = open(logpath,'a+')
+        else:
+            self.fhandle = None
+        self.prefix = prefix
+        self.auto_newline = auto_newline
+        self.write2file_only = write2file_only
+    def __call__(self, text):
+        if text is None: return
+        if self.prefix != '': text = '{} -- '.format(self.prefix) + text
+        # breakpoint()
+        if self.auto_newline:
+            if not text.endswith('\n'):
+                text = text + '\n'
+        if not self.write2file_only: sys.stderr.write(text)
+        if self.fhandle is not None:
+            self.fhandle.write(text)
+            self.fhandle.flush()
+def makepath(*args, **kwargs):
+    '''
+    if the path does not exist make it
+    :param desired_path: can be path to a file or a folder name
+    :return:
+    '''
+    isfile = kwargs.get('isfile', False)
+    import os
+    desired_path = os.path.join(*args)
+    if isfile:
+        if not os.path.exists(os.path.dirname(desired_path)):os.makedirs(os.path.dirname(desired_path))
+    else:
+        if not os.path.exists(desired_path): os.makedirs(desired_path)
+    return desired_path
+def matrot2axisangle(matrots):
+    '''
+    :param matrots: N*T*num_joints*9
+    :return: N*T*num_joints*3
+    '''
+    import cv2
+    N = matrots.shape[0]
+    T = matrots.shape[1]
+    n_joints = matrots.shape[2]
+    out_axisangle = []
+    for tIdx in range(T):
+        T_axisangle = []
+        for mIdx in range(N):
+            cur_axisangle = []
+            for jIdx in range(n_joints):
+                cur_axisangle.append(cv2.Rodrigues(matrots[mIdx, tIdx, jIdx:jIdx + 1, :].reshape(3, 3))[0].T)
+            T_axisangle.append(np.vstack(cur_axisangle)[np.newaxis])
+        out_axisangle.append(np.vstack(T_axisangle).reshape([N,1, -1,3]))
+    return np.concatenate(out_axisangle, axis=1)
+def axisangle2matrots(axisangle):
+    '''
+    :param matrots: N*1*num_joints*3
+    :return: N*num_joints*9
+    '''
+    import cv2
+    batch_size = axisangle.shape[0]
+    axisangle = axisangle.reshape([batch_size,1,-1,3])
+    out_matrot = []
+    for mIdx in range(axisangle.shape[0]):
+        cur_axisangle = []
+        for jIdx in range(axisangle.shape[2]):
+            a = cv2.Rodrigues(axisangle[mIdx, 0, jIdx:jIdx + 1, :].reshape(1, 3))[0].T
+            cur_axisangle.append(a)
+        out_matrot.append(np.array(cur_axisangle).reshape([batch_size,1,-1,9]))
+    return np.vstack(out_matrot)
+def apply_mesh_tranfsormations_(meshes, transf):
+    '''
+    apply inplace translations to meshes
+    :param meshes: list of trimesh meshes
+    :param transf:
+    :return:
+    '''
+    for i in range(len(meshes)):
+        meshes[i] = meshes[i].apply_transform(transf)

mogen/datasets/human_body_prior/tools/rotation_tools.py ADDED Viewed

	@@ -0,0 +1,151 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2020.12.12
+import numpy as np
+from torch.nn import functional as F
+from human_body_prior.tools import tgm_conversion as tgm
+import torch
+def local2global_pose(local_pose, kintree):
+    bs = local_pose.shape[0]
+    local_pose = local_pose.view(bs, -1, 3, 3)
+    global_pose = local_pose.clone()
+    for jId in range(len(kintree)):
+        parent_id = kintree[jId]
+        if parent_id >= 0:
+            global_pose[:, jId] = torch.matmul(global_pose[:, parent_id], global_pose[:, jId])
+    return global_pose
+def em2euler(em):
+    '''
+    :param em: rotation in expo-map (3,)
+    :return: rotation in euler angles (3,)
+    '''
+    from transforms3d.euler import axangle2euler
+    theta = np.sqrt((em ** 2).sum())
+    axis = em / theta
+    return np.array(axangle2euler(axis, theta))
+def euler2em(ea):
+    '''
+    :param ea: rotation in euler angles (3,)
+    :return: rotation in expo-map (3,)
+    '''
+    from transforms3d.euler import euler2axangle
+    axis, theta = euler2axangle(*ea)
+    return np.array(axis*theta)
+def remove_zrot(pose):
+    noZ = em2euler(pose[:3].copy())
+    noZ[2] = 0
+    pose[:3] = euler2em(noZ).copy()
+    return pose
+def matrot2aa(pose_matrot):
+    '''
+    :param pose_matrot: Nx3x3
+    :return: Nx3
+    '''
+    bs = pose_matrot.size(0)
+    homogen_matrot = F.pad(pose_matrot, [0,1])
+    pose = tgm.rotation_matrix_to_angle_axis(homogen_matrot)
+    return pose
+def aa2matrot(pose):
+    '''
+    :param Nx3
+    :return: pose_matrot: Nx3x3
+    '''
+    bs = pose.size(0)
+    num_joints = pose.size(1)//3
+    pose_body_matrot = tgm.angle_axis_to_rotation_matrix(pose)[:, :3, :3].contiguous()#.view(bs, num_joints*9)
+    return pose_body_matrot
+def noisy_zrot(rot_in):
+    '''
+    :param rot_in: np.array Nx3 rotations in axis-angle representation
+    :return:
+        will add a degree from a full circle to the zrotations
+    '''
+    is_batched = False
+    if rot_in.ndim == 2: is_batched = True
+    if not is_batched:
+        rot_in = rot_in[np.newaxis]
+    rnd_zrot = np.random.uniform(-np.pi, np.pi)
+    rot_out = []
+    for bId in range(len(rot_in)):
+        pose_cpu = rot_in[bId]
+        pose_euler = em2euler(pose_cpu)
+        pose_euler[2] += rnd_zrot
+        pose_aa = euler2em(pose_euler)
+        rot_out.append(pose_aa.copy())
+    return np.array(rot_out)
+def rotate_points_xyz(mesh_v, Rxyz):
+    '''
+    :param mesh_v: Nxnum_vx3
+    :param Rxyz: Nx3
+    :return:
+    '''
+    mesh_v_rotated = []
+    for fId in range(mesh_v.shape[0]):
+        angle = np.radians(Rxyz[fId, 0])
+        rx = np.array([
+            [1., 0., 0.           ],
+            [0., np.cos(angle), -np.sin(angle)],
+            [0., np.sin(angle), np.cos(angle) ]
+        ])
+        angle = np.radians(Rxyz[fId, 1])
+        ry = np.array([
+            [np.cos(angle), 0., np.sin(angle)],
+            [0., 1., 0.           ],
+            [-np.sin(angle), 0., np.cos(angle)]
+        ])
+        angle = np.radians(Rxyz[fId, 2])
+        rz = np.array([
+            [np.cos(angle), -np.sin(angle), 0. ],
+            [np.sin(angle), np.cos(angle), 0. ],
+            [0., 0., 1. ]
+        ])
+        mesh_v_rotated.append(rz.dot(ry.dot(rx.dot(mesh_v[fId].T))).T)
+    return np.array(mesh_v_rotated)

mogen/datasets/human_body_prior/tools/tgm_conversion.py ADDED Viewed

	@@ -0,0 +1,527 @@

+'''
+This is a ripped code from an version of torchgeometry now called Kornia. Since Kornia has a
+know bug: https://github.com/kornia/kornia/issues/317#issuecomment-751305910
+in converting rotation representations we use this code until the original bug in Kornia is addressed
+'''
+import torch
+import torch.nn as nn
+__all__ = [
+    # functional api
+    "pi",
+    "rad2deg",
+    "deg2rad",
+    "convert_points_from_homogeneous",
+    "convert_points_to_homogeneous",
+    "angle_axis_to_rotation_matrix",
+    "rotation_matrix_to_angle_axis",
+    "rotation_matrix_to_quaternion",
+    "quaternion_to_angle_axis",
+    "angle_axis_to_quaternion",
+    "rtvec_to_pose",
+    # layer api
+    "RadToDeg",
+    "DegToRad",
+    "ConvertPointsFromHomogeneous",
+    "ConvertPointsToHomogeneous",
+]
+"""Constant with number pi
+"""
+pi = torch.Tensor([3.14159265358979323846])
+def rad2deg(tensor):
+    r"""Function that converts angles from radians to degrees.
+    See :class:`~torchgeometry.RadToDeg` for details.
+    Args:
+        tensor (Tensor): Tensor of arbitrary shape.
+    Returns:
+        Tensor: Tensor with same shape as input.
+    Example:
+        >>> input = tgm.pi * torch.rand(1, 3, 3)
+        >>> output = tgm.rad2deg(input)
+    """
+    if not torch.is_tensor(tensor):
+        raise TypeError("Input type is not a torch.Tensor. Got {}"
+                        .format(type(tensor)))
+    return 180. * tensor / pi.to(tensor.device).type(tensor.dtype)
+def deg2rad(tensor):
+    r"""Function that converts angles from degrees to radians.
+    See :class:`~torchgeometry.DegToRad` for details.
+    Args:
+        tensor (Tensor): Tensor of arbitrary shape.
+    Returns:
+        Tensor: Tensor with same shape as input.
+    Examples::
+        >>> input = 360. * torch.rand(1, 3, 3)
+        >>> output = tgm.deg2rad(input)
+    """
+    if not torch.is_tensor(tensor):
+        raise TypeError("Input type is not a torch.Tensor. Got {}"
+                        .format(type(tensor)))
+    return tensor * pi.to(tensor.device).type(tensor.dtype) / 180.
+def convert_points_from_homogeneous(points):
+    r"""Function that converts points from homogeneous to Euclidean space.
+    See :class:`~torchgeometry.ConvertPointsFromHomogeneous` for details.
+    Examples::
+        >>> input = torch.rand(2, 4, 3)  # BxNx3
+        >>> output = tgm.convert_points_from_homogeneous(input)  # BxNx2
+    """
+    if not torch.is_tensor(points):
+        raise TypeError("Input type is not a torch.Tensor. Got {}".format(
+            type(points)))
+    if len(points.shape) < 2:
+        raise ValueError("Input must be at least a 2D tensor. Got {}".format(
+            points.shape))
+    return points[..., :-1] / points[..., -1:]
+def convert_points_to_homogeneous(points):
+    r"""Function that converts points from Euclidean to homogeneous space.
+    See :class:`~torchgeometry.ConvertPointsToHomogeneous` for details.
+    Examples::
+        >>> input = torch.rand(2, 4, 3)  # BxNx3
+        >>> output = tgm.convert_points_to_homogeneous(input)  # BxNx4
+    """
+    if not torch.is_tensor(points):
+        raise TypeError("Input type is not a torch.Tensor. Got {}".format(
+            type(points)))
+    if len(points.shape) < 2:
+        raise ValueError("Input must be at least a 2D tensor. Got {}".format(
+            points.shape))
+    return nn.functional.pad(points, (0, 1), "constant", 1.0)
+def angle_axis_to_rotation_matrix(angle_axis):
+    """Convert 3d vector of axis-angle rotation to 4x4 rotation matrix
+    Args:
+        angle_axis (Tensor): tensor of 3d vector of axis-angle rotations.
+    Returns:
+        Tensor: tensor of 4x4 rotation matrices.
+    Shape:
+        - Input: :math:`(N, 3)`
+        - Output: :math:`(N, 4, 4)`
+    Example:
+        >>> input = torch.rand(1, 3)  # Nx3
+        >>> output = tgm.angle_axis_to_rotation_matrix(input)  # Nx4x4
+    """
+    def _compute_rotation_matrix(angle_axis, theta2, eps=1e-6):
+        # We want to be careful to only evaluate the square root if the
+        # norm of the angle_axis vector is greater than zero. Otherwise
+        # we get a division by zero.
+        k_one = 1.0
+        theta = torch.sqrt(theta2)
+        wxyz = angle_axis / (theta + eps)
+        wx, wy, wz = torch.chunk(wxyz, 3, dim=1)
+        cos_theta = torch.cos(theta)
+        sin_theta = torch.sin(theta)
+        r00 = cos_theta + wx * wx * (k_one - cos_theta)
+        r10 = wz * sin_theta + wx * wy * (k_one - cos_theta)
+        r20 = -wy * sin_theta + wx * wz * (k_one - cos_theta)
+        r01 = wx * wy * (k_one - cos_theta) - wz * sin_theta
+        r11 = cos_theta + wy * wy * (k_one - cos_theta)
+        r21 = wx * sin_theta + wy * wz * (k_one - cos_theta)
+        r02 = wy * sin_theta + wx * wz * (k_one - cos_theta)
+        r12 = -wx * sin_theta + wy * wz * (k_one - cos_theta)
+        r22 = cos_theta + wz * wz * (k_one - cos_theta)
+        rotation_matrix = torch.cat(
+            [r00, r01, r02, r10, r11, r12, r20, r21, r22], dim=1)
+        return rotation_matrix.view(-1, 3, 3)
+    def _compute_rotation_matrix_taylor(angle_axis):
+        rx, ry, rz = torch.chunk(angle_axis, 3, dim=1)
+        k_one = torch.ones_like(rx)
+        rotation_matrix = torch.cat(
+            [k_one, -rz, ry, rz, k_one, -rx, -ry, rx, k_one], dim=1)
+        return rotation_matrix.view(-1, 3, 3)
+    # stolen from ceres/rotation.h
+    _angle_axis = torch.unsqueeze(angle_axis, dim=1)
+    theta2 = torch.matmul(_angle_axis, _angle_axis.transpose(1, 2))
+    theta2 = torch.squeeze(theta2, dim=1)
+    # compute rotation matrices
+    rotation_matrix_normal = _compute_rotation_matrix(angle_axis, theta2)
+    rotation_matrix_taylor = _compute_rotation_matrix_taylor(angle_axis)
+    # create mask to handle both cases
+    eps = 1e-6
+    mask = (theta2 > eps).view(-1, 1, 1).to(theta2.device)
+    mask_pos = (mask).type_as(theta2)
+    mask_neg = (mask == False).type_as(theta2)  # noqa
+    # create output pose matrix
+    batch_size = angle_axis.shape[0]
+    rotation_matrix = torch.eye(4).to(angle_axis.device).type_as(angle_axis)
+    rotation_matrix = rotation_matrix.view(1, 4, 4).repeat(batch_size, 1, 1)
+    # fill output matrix with masked values
+    rotation_matrix[..., :3, :3] = \
+        mask_pos * rotation_matrix_normal + mask_neg * rotation_matrix_taylor
+    return rotation_matrix  # Nx4x4
+def rtvec_to_pose(rtvec):
+    """
+    Convert axis-angle rotation and translation vector to 4x4 pose matrix
+    Args:
+        rtvec (Tensor): Rodrigues vector transformations
+    Returns:
+        Tensor: transformation matrices
+    Shape:
+        - Input: :math:`(N, 6)`
+        - Output: :math:`(N, 4, 4)`
+    Example:
+        >>> input = torch.rand(3, 6)  # Nx6
+        >>> output = tgm.rtvec_to_pose(input)  # Nx4x4
+    """
+    assert rtvec.shape[-1] == 6, 'rtvec=[rx, ry, rz, tx, ty, tz]'
+    pose = angle_axis_to_rotation_matrix(rtvec[..., :3])
+    pose[..., :3, 3] = rtvec[..., 3:]
+    return pose
+def rotation_matrix_to_angle_axis(rotation_matrix):
+    """Convert 3x4 rotation matrix to Rodrigues vector
+    Args:
+        rotation_matrix (Tensor): rotation matrix.
+    Returns:
+        Tensor: Rodrigues vector transformation.
+    Shape:
+        - Input: :math:`(N, 3, 4)`
+        - Output: :math:`(N, 3)`
+    Example:
+        >>> input = torch.rand(2, 3, 4)  # Nx4x4
+        >>> output = tgm.rotation_matrix_to_angle_axis(input)  # Nx3
+    """
+    # todo add check that matrix is a valid rotation matrix
+    quaternion = rotation_matrix_to_quaternion(rotation_matrix)
+    return quaternion_to_angle_axis(quaternion)
+def rotation_matrix_to_quaternion(rotation_matrix, eps=1e-6):
+    """Convert 3x4 rotation matrix to 4d quaternion vector
+    This algorithm is based on algorithm described in
+    https://github.com/KieranWynn/pyquaternion/blob/master/pyquaternion/quaternion.py#L201
+    Args:
+        rotation_matrix (Tensor): the rotation matrix to convert.
+    Return:
+        Tensor: the rotation in quaternion
+    Shape:
+        - Input: :math:`(N, 3, 4)`
+        - Output: :math:`(N, 4)`
+    Example:
+        >>> input = torch.rand(4, 3, 4)  # Nx3x4
+        >>> output = tgm.rotation_matrix_to_quaternion(input)  # Nx4
+    """
+    if not torch.is_tensor(rotation_matrix):
+        raise TypeError("Input type is not a torch.Tensor. Got {}".format(
+            type(rotation_matrix)))
+    if len(rotation_matrix.shape) > 3:
+        raise ValueError(
+            "Input size must be a three dimensional tensor. Got {}".format(
+                rotation_matrix.shape))
+    if not rotation_matrix.shape[-2:] == (3, 4):
+        raise ValueError(
+            "Input size must be a N x 3 x 4  tensor. Got {}".format(
+                rotation_matrix.shape))
+    rmat_t = torch.transpose(rotation_matrix, 1, 2)
+    mask_d2 = rmat_t[:, 2, 2] < eps
+    mask_d0_d1 = rmat_t[:, 0, 0] > rmat_t[:, 1, 1]
+    mask_d0_nd1 = rmat_t[:, 0, 0] < -rmat_t[:, 1, 1]
+    t0 = 1 + rmat_t[:, 0, 0] - rmat_t[:, 1, 1] - rmat_t[:, 2, 2]
+    q0 = torch.stack([rmat_t[:, 1, 2] - rmat_t[:, 2, 1],
+                      t0, rmat_t[:, 0, 1] + rmat_t[:, 1, 0],
+                      rmat_t[:, 2, 0] + rmat_t[:, 0, 2]], -1)
+    t0_rep = t0.repeat(4, 1).t()
+    t1 = 1 - rmat_t[:, 0, 0] + rmat_t[:, 1, 1] - rmat_t[:, 2, 2]
+    q1 = torch.stack([rmat_t[:, 2, 0] - rmat_t[:, 0, 2],
+                      rmat_t[:, 0, 1] + rmat_t[:, 1, 0],
+                      t1, rmat_t[:, 1, 2] + rmat_t[:, 2, 1]], -1)
+    t1_rep = t1.repeat(4, 1).t()
+    t2 = 1 - rmat_t[:, 0, 0] - rmat_t[:, 1, 1] + rmat_t[:, 2, 2]
+    q2 = torch.stack([rmat_t[:, 0, 1] - rmat_t[:, 1, 0],
+                      rmat_t[:, 2, 0] + rmat_t[:, 0, 2],
+                      rmat_t[:, 1, 2] + rmat_t[:, 2, 1], t2], -1)
+    t2_rep = t2.repeat(4, 1).t()
+    t3 = 1 + rmat_t[:, 0, 0] + rmat_t[:, 1, 1] + rmat_t[:, 2, 2]
+    q3 = torch.stack([t3, rmat_t[:, 1, 2] - rmat_t[:, 2, 1],
+                      rmat_t[:, 2, 0] - rmat_t[:, 0, 2],
+                      rmat_t[:, 0, 1] - rmat_t[:, 1, 0]], -1)
+    t3_rep = t3.repeat(4, 1).t()
+    mask_c0 = mask_d2 * mask_d0_d1
+    mask_c1 = mask_d2 * torch.logical_not(mask_d0_d1)
+    mask_c2 = torch.logical_not(mask_d2) * mask_d0_nd1
+    mask_c3 = torch.logical_not(mask_d2) * torch.logical_not(mask_d0_nd1)
+    mask_c0 = mask_c0.view(-1, 1).type_as(q0)
+    mask_c1 = mask_c1.view(-1, 1).type_as(q1)
+    mask_c2 = mask_c2.view(-1, 1).type_as(q2)
+    mask_c3 = mask_c3.view(-1, 1).type_as(q3)
+    q = q0 * mask_c0 + q1 * mask_c1 + q2 * mask_c2 + q3 * mask_c3
+    q /= torch.sqrt(t0_rep * mask_c0 + t1_rep * mask_c1 +  # noqa
+                    t2_rep * mask_c2 + t3_rep * mask_c3)  # noqa
+    q *= 0.5
+    return q
+def quaternion_to_angle_axis(quaternion) -> torch.Tensor:
+    """Convert quaternion vector to angle axis of rotation.
+    Adapted from ceres C++ library: ceres-solver/include/ceres/rotation.h
+    Args:
+        quaternion (torch.Tensor): tensor with quaternions.
+    Return:
+        torch.Tensor: tensor with angle axis of rotation.
+    Shape:
+        - Input: :math:`(*, 4)` where `*` means, any number of dimensions
+        - Output: :math:`(*, 3)`
+    Example:
+        >>> quaternion = torch.rand(2, 4)  # Nx4
+        >>> angle_axis = tgm.quaternion_to_angle_axis(quaternion)  # Nx3
+    """
+    if not torch.is_tensor(quaternion):
+        raise TypeError("Input type is not a torch.Tensor. Got {}".format(
+            type(quaternion)))
+    if not quaternion.shape[-1] == 4:
+        raise ValueError("Input must be a tensor of shape Nx4 or 4. Got {}"
+                         .format(quaternion.shape))
+    # unpack input and compute conversion
+    q1 = quaternion[..., 1]
+    q2 = quaternion[..., 2]
+    q3 = quaternion[..., 3]
+    sin_squared_theta = q1 * q1 + q2 * q2 + q3 * q3
+    sin_theta = torch.sqrt(sin_squared_theta)
+    cos_theta = quaternion[..., 0]
+    two_theta = 2.0 * torch.where(
+        cos_theta < 0.0,
+        torch.atan2(-sin_theta, -cos_theta),
+        torch.atan2(sin_theta, cos_theta))
+    k_pos = two_theta / sin_theta
+    k_neg = 2.0 * torch.ones_like(sin_theta)
+    k = torch.where(sin_squared_theta > 0.0, k_pos, k_neg)
+    angle_axis = torch.zeros_like(quaternion)[..., :3]
+    angle_axis[..., 0] += q1 * k
+    angle_axis[..., 1] += q2 * k
+    angle_axis[..., 2] += q3 * k
+    return angle_axis
+# based on:
+# https://github.com/facebookresearch/QuaterNet/blob/master/common/quaternion.py#L138
+def angle_axis_to_quaternion(angle_axis) -> torch.Tensor:
+    """Convert an angle axis to a quaternion.
+    Adapted from ceres C++ library: ceres-solver/include/ceres/rotation.h
+    Args:
+        angle_axis (torch.Tensor): tensor with angle axis.
+    Return:
+        torch.Tensor: tensor with quaternion.
+    Shape:
+        - Input: :math:`(*, 3)` where `*` means, any number of dimensions
+        - Output: :math:`(*, 4)`
+    Example:
+        >>> angle_axis = torch.rand(2, 4)  # Nx4
+        >>> quaternion = tgm.angle_axis_to_quaternion(angle_axis)  # Nx3
+    """
+    if not torch.is_tensor(angle_axis):
+        raise TypeError("Input type is not a torch.Tensor. Got {}".format(
+            type(angle_axis)))
+    if not angle_axis.shape[-1] == 3:
+        raise ValueError("Input must be a tensor of shape Nx3 or 3. Got {}"
+                         .format(angle_axis.shape))
+    # unpack input and compute conversion
+    a0 = angle_axis[..., 0:1]
+    a1 = angle_axis[..., 1:2]
+    a2 = angle_axis[..., 2:3]
+    theta_squared = a0 * a0 + a1 * a1 + a2 * a2
+    theta = torch.sqrt(theta_squared)
+    half_theta = theta * 0.5
+    mask = theta_squared > 0.0
+    ones = torch.ones_like(half_theta)
+    k_neg = 0.5 * ones
+    k_pos = torch.sin(half_theta) / theta
+    k = torch.where(mask, k_pos, k_neg)
+    w = torch.where(mask, torch.cos(half_theta), ones)
+    quaternion = torch.zeros_like(angle_axis)
+    quaternion[..., 0:1] += a0 * k
+    quaternion[..., 1:2] += a1 * k
+    quaternion[..., 2:3] += a2 * k
+    return torch.cat([w, quaternion], dim=-1)
+# TODO: add below funtionalities
+#  - pose_to_rtvec
+# layer api
+class RadToDeg(nn.Module):
+    r"""Creates an object that converts angles from radians to degrees.
+    Args:
+        tensor (Tensor): Tensor of arbitrary shape.
+    Returns:
+        Tensor: Tensor with same shape as input.
+    Examples::
+        >>> input = tgm.pi * torch.rand(1, 3, 3)
+        >>> output = tgm.RadToDeg()(input)
+    """
+    def __init__(self):
+        super(RadToDeg, self).__init__()
+    def forward(self, input):
+        return rad2deg(input)
+class DegToRad(nn.Module):
+    r"""Function that converts angles from degrees to radians.
+    Args:
+        tensor (Tensor): Tensor of arbitrary shape.
+    Returns:
+        Tensor: Tensor with same shape as input.
+    Examples::
+        >>> input = 360. * torch.rand(1, 3, 3)
+        >>> output = tgm.DegToRad()(input)
+    """
+    def __init__(self):
+        super(DegToRad, self).__init__()
+    def forward(self, input):
+        return deg2rad(input)
+class ConvertPointsFromHomogeneous(nn.Module):
+    r"""Creates a transformation that converts points from homogeneous to
+    Euclidean space.
+    Args:
+        points (Tensor): tensor of N-dimensional points.
+    Returns:
+        Tensor: tensor of N-1-dimensional points.
+    Shape:
+        - Input: :math:`(B, D, N)` or :math:`(D, N)`
+        - Output: :math:`(B, D, N + 1)` or :math:`(D, N + 1)`
+    Examples::
+        >>> input = torch.rand(2, 4, 3)  # BxNx3
+        >>> transform = tgm.ConvertPointsFromHomogeneous()
+        >>> output = transform(input)  # BxNx2
+    """
+    def __init__(self):
+        super(ConvertPointsFromHomogeneous, self).__init__()
+    def forward(self, input):
+        return convert_points_from_homogeneous(input)
+class ConvertPointsToHomogeneous(nn.Module):
+    r"""Creates a transformation to convert points from Euclidean to
+    homogeneous space.
+    Args:
+        points (Tensor): tensor of N-dimensional points.
+    Returns:
+        Tensor: tensor of N+1-dimensional points.
+    Shape:
+        - Input: :math:`(B, D, N)` or :math:`(D, N)`
+        - Output: :math:`(B, D, N + 1)` or :math:`(D, N + 1)`
+    Examples::
+        >>> input = torch.rand(2, 4, 3)  # BxNx3
+        >>> transform = tgm.ConvertPointsToHomogeneous()
+        >>> output = transform(input)  # BxNx4
+    """
+    def __init__(self):
+        super(ConvertPointsToHomogeneous, self).__init__()
+    def forward(self, input):
+        return convert_points_to_homogeneous(input)

mogen/datasets/human_body_prior/train/README.md ADDED Viewed

	@@ -0,0 +1,41 @@

+# Train VPoser from Scratch
+To train your own VPoser with new configuration duplicate the provided **V02_05** folder while setting a new experiment ID
+and change the settings as you desire.
+First you would need to download the
+[AMASS](https://amass.is.tue.mpg.de/) dataset, then following the [data preparation tutorial](../data/README.md)
+prepare the data for training.
+Following is a code snippet for training that can be found in the [example training experiment](https://github.com/nghorbani/human_body_prior/blob/master/src/human_body_prior/train/V02_05/V02_05.py):
+```python
+import glob
+import os.path as osp
+from human_body_prior.tools.configurations import load_config
+from human_body_prior.train.vposer_trainer import train_vposer_once
+def main():
+    expr_id = 'V02_05'
+    default_ps_fname = glob.glob(osp.join(osp.dirname(__file__), '*.yaml'))[0]
+    vp_ps = load_config(default_ps_fname)
+    vp_ps.train_parms.batch_size = 128
+    vp_ps.general.expr_id = expr_id
+    total_jobs = []
+    total_jobs.append(vp_ps.toDict().copy())
+    print('#training_jobs to be done: {}'.format(len(total_jobs)))
+    if len(total_jobs) == 0:
+        print('No jobs to be done')
+        return
+    for job in total_jobs:
+        train_vposer_once(job)
+```
+The above code uses yaml configuration files to handle experiment settings.
+It loads the default settings in *<expr_id>.yaml* and overloads it with your new args.
+The training code, will dump a log file along with tensorboard readable events file.

mogen/datasets/human_body_prior/train/V02_05/V02_05.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2020.12.12
+import glob
+import os.path as osp
+from human_body_prior.tools.configurations import load_config
+from human_body_prior.train.vposer_trainer import train_vposer_once
+def main():
+    expr_id = 'V02_05'
+    default_ps_fname = glob.glob(osp.join(osp.dirname(__file__), '*.yaml'))[0]
+    vp_ps = load_config(default_ps_fname)
+    vp_ps.train_parms.batch_size = 128
+    vp_ps.general.expr_id = expr_id
+    total_jobs = []
+    total_jobs.append(vp_ps.toDict().copy())
+    print('#training_jobs to be done: {}'.format(len(total_jobs)))
+    if len(total_jobs) == 0:
+        print('No jobs to be done')
+        return
+    for job in total_jobs:
+        train_vposer_once(job)
+if __name__ == '__main__':
+    main()

mogen/datasets/human_body_prior/train/V02_05/V02_05.yaml ADDED Viewed

	@@ -0,0 +1,84 @@

+---
+body_model:
+  gender: neutral
+  bm_fname: ../../../../support_data/dowloads/models/smplx/neutral/model.npz
+general:
+  verbosity: 0
+  expr_id:
+  dataset_id: V02_03 #SMPLx neutral
+  rnd_seed: 100
+  work_basedir: ../../../../support_data/training/training_experiments
+  dataset_basedir: ../../../../support_data/training/data
+logging:
+  expr_msg:
+  num_bodies_to_display: 25
+  work_dir:
+  dataset_dir:
+  render_during_training: False
+  best_model_fname:
+train_parms:
+  batch_size:
+  num_epochs: 100
+  restore_optimizer: False
+  gen_optimizer:
+    type: Adam
+    args:
+      lr: 0.001
+      weight_decay: 0.00001
+  lr_scheduler:
+    type: ReduceLROnPlateau
+    args:
+      #      metrics: val_loss
+      verbose: true
+      patience: 5
+  early_stopping:
+    monitor: val_loss
+    min_delta: 0.0
+    patience: 10
+    verbose: True
+    mode: min
+  keep_extra_loss_terms_until_epoch: 15
+  loss_weights:
+    loss_kl_wt: 0.005
+    loss_rec_wt: 4
+    loss_matrot_wt: 2
+    loss_jtr_wt: 2
+data_parms:
+  num_workers: 5 # Used for dataloaders
+  amass_dir: support_data/dowloads/amass/smplx_neutral
+  num_timeseq_frames: 1
+  amass_splits:
+    vald:
+#      - HumanEva
+#      - MPI_HDM05
+#      - SFU
+#      - MPI_mosh
+      - BMLrub_vald
+    train:
+      - CMU
+      - BMLrub_train
+#      - MPI_Limits
+#      - TotalCapture
+#      - Eyes_Japan_Dataset
+#      - KIT
+#      - BMLrub
+#      - EKUT
+#      - TCD_handMocap
+#      - ACCAD
+#      - BMLmovi
+    test:
+      - BMLrub_test
+#      - Transitions_mocap
+#      - SSM_synced
+#      - DFaust_67
+model_params:
+  num_neurons : 512
+  latentD : 32

mogen/datasets/human_body_prior/train/V02_05/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2020.12.12

mogen/datasets/human_body_prior/train/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2018.01.02

mogen/datasets/human_body_prior/train/vposer_trainer.py ADDED Viewed

	@@ -0,0 +1,337 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2020.12.12
+# from pytorch_lightning import Trainer
+import glob
+import os
+import os.path as osp
+from datetime import datetime as dt
+from pytorch_lightning.plugins import DDPPlugin
+import numpy as np
+import pytorch_lightning as pl
+import torch
+from human_body_prior.body_model.body_model import BodyModel
+from human_body_prior.data.dataloader import VPoserDS
+from human_body_prior.data.prepare_data import dataset_exists
+from human_body_prior.data.prepare_data import prepare_vposer_datasets
+from human_body_prior.models.vposer_model import VPoser
+from human_body_prior.tools.angle_continuous_repres import geodesic_loss_R
+from human_body_prior.tools.configurations import load_config, dump_config
+from human_body_prior.tools.omni_tools import copy2cpu as c2c
+from human_body_prior.tools.omni_tools import get_support_data_dir
+from human_body_prior.tools.omni_tools import log2file
+from human_body_prior.tools.omni_tools import make_deterministic
+from human_body_prior.tools.omni_tools import makepath
+from human_body_prior.tools.rotation_tools import aa2matrot
+from human_body_prior.visualizations.training_visualization import vposer_trainer_renderer
+from pytorch_lightning.callbacks import LearningRateMonitor
+from pytorch_lightning.callbacks.early_stopping import EarlyStopping
+from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
+from pytorch_lightning.core import LightningModule
+from pytorch_lightning.loggers import TensorBoardLogger
+from pytorch_lightning.utilities import rank_zero_only
+from torch import optim as optim_module
+from torch.optim import lr_scheduler as lr_sched_module
+from torch.utils.data import DataLoader
+class VPoserTrainer(LightningModule):
+    """
+    It includes all data loading and train / val logic., and it is used for both training and testing models.
+    """
+    def __init__(self, _config):
+        super(VPoserTrainer, self).__init__()
+        _support_data_dir = get_support_data_dir()
+        vp_ps = load_config(**_config)
+        make_deterministic(vp_ps.general.rnd_seed)
+        self.expr_id = vp_ps.general.expr_id
+        self.dataset_id = vp_ps.general.dataset_id
+        self.work_dir = vp_ps.logging.work_dir = makepath(vp_ps.general.work_basedir, self.expr_id)
+        self.dataset_dir = vp_ps.logging.dataset_dir = osp.join(vp_ps.general.dataset_basedir, vp_ps.general.dataset_id)
+        self._log_prefix = '[{}]'.format(self.expr_id)
+        self.text_logger = log2file(prefix=self._log_prefix)
+        self.seq_len = vp_ps.data_parms.num_timeseq_frames
+        self.vp_model = VPoser(vp_ps)
+        with torch.no_grad():
+            self.bm_train = BodyModel(vp_ps.body_model.bm_fname)
+        if vp_ps.logging.render_during_training:
+            self.renderer = vposer_trainer_renderer(self.bm_train, vp_ps.logging.num_bodies_to_display)
+        else:
+            self.renderer = None
+        self.example_input_array = {'pose_body':torch.ones(vp_ps.train_parms.batch_size, 63),}
+        self.vp_ps = vp_ps
+    def forward(self, pose_body):
+        return self.vp_model(pose_body)
+    def _get_data(self, split_name):
+        assert split_name in ('train', 'vald', 'test')
+        split_name = split_name.replace('vald', 'vald')
+        assert dataset_exists(self.dataset_dir), FileNotFoundError('Dataset does not exist dataset_dir = {}'.format(self.dataset_dir))
+        dataset = VPoserDS(osp.join(self.dataset_dir, split_name), data_fields = ['pose_body'])
+        assert len(dataset) != 0, ValueError('Dataset has nothing in it!')
+        return DataLoader(dataset,
+                          batch_size=self.vp_ps.train_parms.batch_size,
+                          shuffle=True if split_name == 'train' else False,
+                          num_workers=self.vp_ps.data_parms.num_workers,
+                          pin_memory=True)
+    @rank_zero_only
+    def on_train_start(self):
+        if self.global_rank != 0: return
+        self.train_starttime = dt.now().replace(microsecond=0)
+        ######## make a backup of vposer
+        git_repo_dir = os.path.abspath(__file__).split('/')
+        git_repo_dir = '/'.join(git_repo_dir[:git_repo_dir.index('human_body_prior') + 1])
+        starttime = dt.strftime(self.train_starttime, '%Y_%m_%d_%H_%M_%S')
+        archive_path = makepath(self.work_dir, 'code', 'vposer_{}.tar.gz'.format(starttime), isfile=True)
+        cmd = 'cd %s && git ls-files -z | xargs -0 tar -czf %s' % (git_repo_dir, archive_path)
+        os.system(cmd)
+        ########
+        self.text_logger('Created a git archive backup at {}'.format(archive_path))
+        dump_config(self.vp_ps, osp.join(self.work_dir, '{}.yaml'.format(self.expr_id)))
+    def train_dataloader(self):
+        return self._get_data('train')
+    def val_dataloader(self):
+        return self._get_data('vald')
+    def configure_optimizers(self):
+        params_count = lambda params: sum(p.numel() for p in params if p.requires_grad)
+        gen_params = [a[1] for a in self.vp_model.named_parameters() if a[1].requires_grad]
+        gen_optimizer_class = getattr(optim_module, self.vp_ps.train_parms.gen_optimizer.type)
+        gen_optimizer = gen_optimizer_class(gen_params, **self.vp_ps.train_parms.gen_optimizer.args)
+        self.text_logger('Total Trainable Parameters Count in vp_model is %2.2f M.' % (params_count(gen_params) * 1e-6))
+        lr_sched_class = getattr(lr_sched_module, self.vp_ps.train_parms.lr_scheduler.type)
+        gen_lr_scheduler = lr_sched_class(gen_optimizer, **self.vp_ps.train_parms.lr_scheduler.args)
+        schedulers = [
+            {
+                'scheduler': gen_lr_scheduler,
+                'monitor': 'val_loss',
+                'interval': 'epoch',
+                'frequency': 1
+            },
+        ]
+        return [gen_optimizer], schedulers
+    def _compute_loss(self, dorig, drec):
+        l1_loss = torch.nn.L1Loss(reduction='mean')
+        geodesic_loss = geodesic_loss_R(reduction='mean')
+        bs, latentD = drec['poZ_body_mean'].shape
+        device = drec['poZ_body_mean'].device
+        loss_kl_wt = self.vp_ps.train_parms.loss_weights.loss_kl_wt
+        loss_rec_wt = self.vp_ps.train_parms.loss_weights.loss_rec_wt
+        loss_matrot_wt = self.vp_ps.train_parms.loss_weights.loss_matrot_wt
+        loss_jtr_wt = self.vp_ps.train_parms.loss_weights.loss_jtr_wt
+        # q_z = torch.distributions.normal.Normal(drec['mean'], drec['std'])
+        q_z = drec['q_z']
+        # dorig['fullpose'] = torch.cat([dorig['root_orient'], dorig['pose_body']], dim=-1)
+        # Reconstruction loss - L1 on the output mesh
+        with torch.no_grad():
+            bm_orig = self.bm_train(pose_body=dorig['pose_body'])
+        bm_rec = self.bm_train(pose_body=drec['pose_body'].contiguous().view(bs, -1))
+        v2v = l1_loss(bm_rec.v, bm_orig.v)
+        # KL loss
+        p_z = torch.distributions.normal.Normal(
+            loc=torch.zeros((bs, latentD), device=device, requires_grad=False),
+            scale=torch.ones((bs, latentD), device=device, requires_grad=False))
+        weighted_loss_dict = {
+            'loss_kl':loss_kl_wt * torch.mean(torch.sum(torch.distributions.kl.kl_divergence(q_z, p_z), dim=[1])),
+            'loss_mesh_rec': loss_rec_wt * v2v
+        }
+        if (self.current_epoch < self.vp_ps.train_parms.keep_extra_loss_terms_until_epoch):
+            # breakpoint()
+            weighted_loss_dict['matrot'] = loss_matrot_wt * geodesic_loss(drec['pose_body_matrot'].view(-1,3,3), aa2matrot(dorig['pose_body'].view(-1, 3)))
+            weighted_loss_dict['jtr'] = loss_jtr_wt * l1_loss(bm_rec.Jtr, bm_orig.Jtr)
+        weighted_loss_dict['loss_total'] = torch.stack(list(weighted_loss_dict.values())).sum()
+        with torch.no_grad():
+            unweighted_loss_dict = {'v2v': torch.sqrt(torch.pow(bm_rec.v-bm_orig.v, 2).sum(-1)).mean()}
+            unweighted_loss_dict['loss_total'] = torch.cat(
+                list({k: v.view(-1) for k, v in unweighted_loss_dict.items()}.values()), dim=-1).sum().view(1)
+        return {'weighted_loss': weighted_loss_dict, 'unweighted_loss': unweighted_loss_dict}
+    def training_step(self, batch, batch_idx, optimizer_idx=None):
+        drec = self(batch['pose_body'].view(-1, 63))
+        loss = self._compute_loss(batch, drec)
+        train_loss = loss['weighted_loss']['loss_total']
+        tensorboard_logs = {'train_loss': train_loss}
+        progress_bar = {k: c2c(v) for k, v in loss['weighted_loss'].items()}
+        return {'loss': train_loss, 'progress_bar':progress_bar,  'log': tensorboard_logs}
+    def validation_step(self, batch, batch_idx):
+        drec = self(batch['pose_body'].view(-1, 63))
+        loss = self._compute_loss(batch, drec)
+        val_loss = loss['unweighted_loss']['loss_total']
+        if self.renderer is not None and self.global_rank == 0 and batch_idx % 500==0 and np.random.rand()>0.5:
+            out_fname = makepath(self.work_dir, 'renders/vald_rec_E{:03d}_It{:04d}_val_loss_{:.2f}.png'.format(self.current_epoch, batch_idx, val_loss.item()), isfile=True)
+            self.renderer([batch, drec], out_fname = out_fname)
+            dgen = self.vp_model.sample_poses(self.vp_ps.logging.num_bodies_to_display)
+            out_fname = makepath(self.work_dir, 'renders/vald_gen_E{:03d}_I{:04d}.png'.format(self.current_epoch, batch_idx), isfile=True)
+            self.renderer([dgen], out_fname = out_fname)
+        progress_bar = {'v2v': val_loss}
+        return {'val_loss': c2c(val_loss), 'progress_bar': progress_bar, 'log': progress_bar}
+    def validation_epoch_end(self, outputs):
+        metrics = {'val_loss': np.nanmean(np.concatenate([v['val_loss'] for v in outputs])) }
+        if self.global_rank == 0:
+            self.text_logger('Epoch {}: {}'.format(self.current_epoch, ', '.join('{}:{:.2f}'.format(k, v) for k, v in metrics.items())))
+            self.text_logger('lr is {}'.format([pg['lr'] for opt in self.trainer.optimizers for pg in opt.param_groups]))
+        metrics = {k: torch.as_tensor(v) for k, v in metrics.items()}
+        return {'val_loss': metrics['val_loss'], 'log': metrics}
+    @rank_zero_only
+    def on_train_end(self):
+        self.train_endtime = dt.now().replace(microsecond=0)
+        endtime = dt.strftime(self.train_endtime, '%Y_%m_%d_%H_%M_%S')
+        elapsedtime = self.train_endtime - self.train_starttime
+        self.vp_ps.logging.best_model_fname = self.trainer.checkpoint_callback.best_model_path
+        self.text_logger('Epoch {} - Finished training at {} after {}'.format(self.current_epoch, endtime, elapsedtime))
+        self.text_logger('best_model_fname: {}'.format(self.vp_ps.logging.best_model_fname))
+        dump_config(self.vp_ps, osp.join(self.work_dir, '{}_{}.yaml'.format(self.expr_id, self.dataset_id)))
+        self.hparams = self.vp_ps.toDict()
+    @rank_zero_only
+    def prepare_data(self):
+        '''' Similar to standard AMASS dataset preparation pipeline:
+        Donwload npz file, corresponding to body data from https://amass.is.tue.mpg.de/ and place them under amass_dir
+        '''
+        self.text_logger = log2file(makepath(self.work_dir, '{}.log'.format(self.expr_id), isfile=True), prefix=self._log_prefix)
+        prepare_vposer_datasets(self.dataset_dir, self.vp_ps.data_parms.amass_splits, self.vp_ps.data_parms.amass_dir, logger=self.text_logger)
+def create_expr_message(ps):
+    expr_msg = '[{}] batch_size = {}.'.format(ps.general.expr_id, ps.train_parms.batch_size)
+    return expr_msg
+def train_vposer_once(_config):
+    resume_training_if_possible = True
+    model = VPoserTrainer(_config)
+    model.vp_ps.logging.expr_msg = create_expr_message(model.vp_ps)
+    # model.text_logger(model.vp_ps.logging.expr_msg.replace(". ", '.\n'))
+    dump_config(model.vp_ps, osp.join(model.work_dir, '{}.yaml'.format(model.expr_id)))
+    logger = TensorBoardLogger(model.work_dir, name='tensorboard')
+    lr_monitor = LearningRateMonitor()
+    snapshots_dir = osp.join(model.work_dir, 'snapshots')
+    checkpoint_callback = ModelCheckpoint(
+        dirpath=makepath(snapshots_dir, isfile=True),
+        filename="%s_{epoch:02d}_{val_loss:.2f}" % model.expr_id,
+        save_top_k=1,
+        verbose=True,
+        monitor='val_loss',
+        mode='min',
+    )
+    early_stop_callback = EarlyStopping(**model.vp_ps.train_parms.early_stopping)
+    resume_from_checkpoint = None
+    if resume_training_if_possible:
+        available_ckpts = sorted(glob.glob(osp.join(snapshots_dir, '*.ckpt')), key=os.path.getmtime)
+        if len(available_ckpts)>0:
+            resume_from_checkpoint = available_ckpts[-1]
+            model.text_logger('Resuming the training from {}'.format(resume_from_checkpoint))
+    trainer = pl.Trainer(gpus=1,
+                         weights_summary='top',
+                         distributed_backend = 'ddp',
+                         # replace_sampler_ddp=False,
+                         # accumulate_grad_batches=4,
+                         # profiler=False,
+                         # overfit_batches=0.05,
+                         # fast_dev_run = True,
+                         # limit_train_batches=0.02,
+                         # limit_val_batches=0.02,
+                         # num_sanity_val_steps=2,
+                         plugins=[DDPPlugin(find_unused_parameters=False)],
+                         callbacks=[lr_monitor, early_stop_callback, checkpoint_callback],
+                         max_epochs=model.vp_ps.train_parms.num_epochs,
+                         logger=logger,
+                         resume_from_checkpoint=resume_from_checkpoint
+                         )
+    trainer.fit(model)

mogen/datasets/human_body_prior/visualizations/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2020.12.12

mogen/datasets/human_body_prior/visualizations/training_visualization.py ADDED Viewed

	@@ -0,0 +1,123 @@

+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2019 Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG),
+# acting on behalf of its Max Planck Institute for Intelligent Systems and the
+# Max Planck Institute for Biological Cybernetics. All rights reserved.
+#
+# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is holder of all proprietary rights
+# on this computer program. You can only use this computer program if you have closed a license agreement
+# with MPG or you get the right to use the computer program from someone who is authorized to grant you that right.
+# Any use of the computer program without a valid license is prohibited and liable to prosecution.
+# Contact: [email protected]
+#
+#
+# If you use this code in a research publication please consider citing the following:
+#
+# Expressive Body Capture: 3D Hands, Face, and Body from a Single Image <https://arxiv.org/abs/1904.05866>
+#
+#
+# Code Developed by:
+# Nima Ghorbani <https://nghorbani.github.io/>
+#
+# 2020.12.12
+def pyrenderer(imw=2048, imh=2048):
+    from body_visualizer.mesh.mesh_viewer import MeshViewer
+    import cv2
+    import numpy as np
+    import trimesh
+    try:
+        mv = MeshViewer(width=imw, height=imh, use_offscreen=True)
+    except:
+        import os
+        os.environ['PYOPENGL_PLATFORM'] = 'egl'
+        os.environ['EGL_DEVICE_ID'] = os.environ['GPU_DEVICE_ORDINAL'].split(',')[0]
+        mv = MeshViewer(width=imw, height=imh, use_offscreen=True)
+    mv.set_cam_trans([0, -0.5, 2.])
+    def render_an_image(meshes):
+        n_all = len(meshes)
+        nc = int(np.sqrt(n_all))
+        out_image = np.zeros([1, 1, 1, mv.width, mv.height, 4])
+        scale_percent = 100./nc
+        width = int(mv.width * scale_percent / 100)
+        height = int(mv.height * scale_percent / 100)
+        dim = (width, height)
+        for rId in range(nc):
+            for cId in range(nc):
+                i = (nc*rId) + cId
+                if i>len(meshes): break
+                mesh = meshes[i]
+                # mesh.apply_transform(trimesh.transformations.rotation_matrix(np.radians(-90), (1, 0, 0)))
+                mesh.vertices -= np.median(np.array(mesh.vertices), axis=0)
+                mv.set_dynamic_meshes([mesh])
+                img = mv.render(render_wireframe=False, RGBA=True)
+                img_resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
+                out_image[0, 0, 0, (rId*width):((rId+1)*width), (cId*height):((cId+1)*height)] = cv2.cvtColor(img_resized, cv2.COLOR_BGRA2RGBA)
+        return out_image.astype(np.uint8)
+    return render_an_image
+def vposer_trainer_renderer(bm, num_bodies_to_display=5):
+    import numpy as np
+    import trimesh
+    import torch
+    from body_visualizer.tools.vis_tools import imagearray2file, colors
+    from human_body_prior.tools.omni_tools import copy2cpu as c2c
+    from human_body_prior.tools.omni_tools import makepath
+    from trimesh import Trimesh as Mesh
+    from trimesh.util import concatenate as mesh_cat
+    renderer = pyrenderer(1024, 1024)
+    faces = c2c(bm.f)
+    def render_once(body_parms, body_colors=[colors['grey'], colors['brown-light']], out_fname=None):
+        '''
+        :param body_parms: list of dictionaries of body parameters.
+        :param body_colors: list of np arrays of color rgb values
+        :param movie_outpath: a mp4 path
+        :return:
+        '''
+        if out_fname is not None: makepath(out_fname, isfile=True)
+        assert len(body_parms) <= len(body_colors), ValueError('Not enough colors provided for #{} body_parms'.format(len(body_parms)))
+        bs = body_parms[0]['pose_body'].shape[0]
+        body_ids = np.random.choice(bs, num_bodies_to_display)
+        body_evals = [c2c(bm(root_orient=v['root_orient'].view(bs, -1) if 'root_orient' in v else torch.zeros(bs, 3).type_as(v['pose_body']),
+                         pose_body=v['pose_body'].contiguous().view(bs, -1)).v) for v in body_parms]
+        num_verts = body_evals[0].shape[1]
+        render_meshes = []
+        for bId in body_ids:
+            concat_cur_meshes = None
+            for body, body_color in zip(body_evals, body_colors):
+                cur_body_mesh = Mesh(body[bId], faces, vertex_colors=np.ones([num_verts, 3]) * body_color)
+                concat_cur_meshes = cur_body_mesh if concat_cur_meshes is None else mesh_cat(concat_cur_meshes, cur_body_mesh)
+            render_meshes.append(concat_cur_meshes)
+        img = renderer(render_meshes)
+        if out_fname is not None: imagearray2file(img, out_fname, fps=10)
+        return
+    return render_once

mogen/datasets/motionverse_dataset.py ADDED Viewed

	@@ -0,0 +1,828 @@

+import copy
+import os
+import pickle as pkl
+from typing import Optional, Union, List
+import numpy as np
+import torch
+import torch.nn as nn
+import json
+from torch.utils.data import ConcatDataset, Dataset, WeightedRandomSampler
+from .builder import DATASETS
+from .pipelines import Compose, RetargetSkeleton
+import random
+import pytorch3d.transforms as geometry
+from scipy.ndimage import gaussian_filter
+# from mogen.core.evaluation import build_evaluator
+# from mogen.core.evaluation.utils import compute_similarity_transform, transform_pose_sequence
+from mogen.models.builder import build_submodule
+from .utils import copy_repr_data, extract_repr_data, move_repr_data, recover_from_ric
+class SingleMotionVerseDataset(Dataset):
+    """
+    A dataset class for handling single MotionVerse datasets.
+    Args:
+        dataset_name (str): Name of the dataset and task to load.
+        data_prefix (str): Path to the directory containing the dataset.
+        ann_file (str): Path to the annotation file.
+        pipeline (list): A list of transformations to apply on the data.
+        mode (str): the mode of current work. Choices: ['pretrain', 'train', 'test'].
+        eval_cfg (dict): Configuration for evaluation metrics.
+    """
+    def __init__(self,
+                 dataset_path: Optional[str] = None,
+                 task_name: Optional[str] = None,
+                 data_prefix: Optional[str] = None,
+                 ann_file: Optional[str] = None,
+                 pipeline: Optional[List[dict]] = None,
+                 # for text2motion and speech2gesture
+                 tgt_min_motion_length: int = 20,
+                 tgt_max_motion_length: int = 200,
+                 # for video2motion
+                 v2m_window_size: int = 20,
+                 # for motion prediction
+                 mp_input_length: int = 50,
+                 mp_output_length: int = 25,
+                 mp_stride_step: int = 5,
+                 # for general test
+                 test_rotation_type: str = 'h3d_rot',
+                 target_framerate: float = 20,
+                 eval_cfg: Optional[dict] = None,
+                 test_mode: Optional[bool] = False):
+        data_prefix = os.path.join(data_prefix, 'datasets', dataset_path)
+        self.dataset_path = dataset_path
+        assert task_name in ['mocap', 't2m', 'v2m', 's2g', 'm2d']
+        self.task_name = task_name
+        self.dataset_name = dataset_path + '_' + task_name
+        # define subdirectories
+        self.meta_dir = os.path.join(data_prefix, 'metas')
+        self.motion_dir = os.path.join(data_prefix, 'motions')
+        self.eval_motion_dir = os.path.join(data_prefix, 'eval_motions')
+        self.text_dir = os.path.join(data_prefix, 'texts')
+        self.text_feat_dir = os.path.join(data_prefix, 'text_feats')
+        self.speech_dir = os.path.join(data_prefix, 'speeches')
+        self.speech_feat_dir = os.path.join(data_prefix, 'speech_feats')
+        self.music_dir = os.path.join(data_prefix, 'musics')
+        self.music_feat_dir = os.path.join(data_prefix, 'music_feats')
+        self.video_feat_dir = os.path.join(data_prefix, 'video_feats')
+        self.anno_file = os.path.join(data_prefix, 'splits', ann_file)
+        self.pipeline = Compose(pipeline)
+        self.tgt_min_motion_length = tgt_min_motion_length
+        self.tgt_max_motion_length = tgt_max_motion_length
+        self.v2m_window_size = v2m_window_size
+        self.mp_input_length = mp_input_length
+        self.mp_output_length = mp_output_length
+        self.mp_stride_step = mp_stride_step
+        self.target_framerate = target_framerate
+        self.test_rotation_type = test_rotation_type
+        self.test_mode = test_mode
+        self.load_annotations()
+        self.eval_cfg = copy.deepcopy(eval_cfg)
+        if self.test_mode:
+            self.prepare_evaluation()
+    def __len__(self) -> int:
+        """Return the length of the current dataset."""
+        if self.test_mode:
+            return len(self.eval_indexes)
+        return len(self.name_list)
+    def __getitem__(self, idx: int) -> dict:
+        """Prepare data for the given index."""
+        if self.test_mode:
+            idx = self.eval_indexes[idx]
+        return self.prepare_data(idx)
+    def load_annotations(self):
+        if self.task_name == 'mocap':
+            self.load_annotations_mocap()
+        elif self.task_name == 't2m':
+            self.load_annotations_t2m()
+        elif self.task_name == 'v2m':
+            self.load_annotations_v2m()
+        elif self.task_name == 's2g':
+            self.load_annotations_s2g()
+        elif self.task_name == 'm2d':
+            self.load_annotations_m2d()
+        else:
+            raise NotImplementedError()
+    def load_annotations_mocap(self):
+        if self.test_mode:
+            self.name_list = []
+            self.src_start_frame = []
+            self.src_end_frame = []
+            self.tgt_start_frame = []
+            self.tgt_end_frame = []
+            tgt_motion_length = self.mp_input_length + self.mp_output_length
+            for name in open(self.anno_file):
+                name = name.strip()
+                meta_path = os.path.join(self.meta_dir, name + ".json")
+                meta_data = json.load(open(meta_path))
+                num_frames = meta_data['num_frames']
+                downrate = int(meta_data['framerate'] / self.target_framerate + 0.1)
+                if num_frames < (self.mp_input_length + self.mp_output_length) * downrate:
+                    continue
+                lim = num_frames // downrate - tgt_motion_length
+                for start_frame in range(0, lim, self.mp_stride_step):
+                    self.name_list.append(name)
+                    self.src_start_frame.append((start_frame + 1) * downrate)
+                    self.src_end_frame.append((start_frame + tgt_motion_length + 1) * downrate)
+                    self.tgt_start_frame.append(start_frame + self.mp_input_length)
+                    self.tgt_end_frame.append(start_frame + tgt_motion_length)
+        else:
+            self.name_list = []
+            for name in open(self.anno_file):
+                name = name.strip()
+                self.name_list.append(name)
+    def load_annotations_t2m(self):
+        self.name_list = []
+        self.text_idx = []
+        for name in open(self.anno_file):
+            name = name.strip()
+            meta_path = os.path.join(self.meta_dir, name + ".json")
+            meta_data = json.load(open(meta_path))
+            downrate = int(meta_data['framerate'] / self.target_framerate + 0.1)
+            text_path = os.path.join(self.text_dir, name + ".json")
+            text_data = json.load(open(text_path))
+            for i, anno in enumerate(text_data):
+                start_frame = anno['start_frame'] // downrate
+                end_frame = min(anno['end_frame'], meta_data['num_frames']) // downrate
+                num_frame = end_frame - start_frame
+                if num_frame < self.tgt_min_motion_length or num_frame > self.tgt_max_motion_length:
+                    continue
+                if len(anno['body_text']) > 0:
+                    self.name_list.append(name)
+                    self.text_idx.append(i)
+    def load_annotations_v2m(self):
+        if not self.test_mode:
+            self.name_list = []
+            for name in open(self.anno_file):
+                name = name.strip()
+                self.name_list.append(name)
+        else:
+            self.name_list = []
+            self.start_frame = []
+            self.end_frame = []
+            self.valid_start_frame = []
+            self.valid_end_frame = []
+            for name in open(self.anno_file):
+                name = name.strip()
+                meta_path = os.path.join(self.meta_dir, name + ".json")
+                meta_data = json.load(open(meta_path))
+                num_frames = meta_data['num_frames']
+                assert num_frames >= self.v2m_window_size
+                cur_idx = 0
+                while cur_idx < num_frames:
+                    if cur_idx + self.v2m_window_size < num_frames:
+                        self.name_list.append(name)
+                        self.start_frame.append(cur_idx)
+                        self.end_frame.append(cur_idx + self.v2m_window_size)
+                        self.valid_start_frame.append(cur_idx)
+                        self.valid_end_frame.append(cur_idx + self.v2m_window_size)
+                        cur_idx += self.v2m_window_size
+                    else:
+                        self.name_list.append(name)
+                        self.start_frame.append(num_frames - self.v2m_window_size)
+                        self.end_frame.append(num_frames)
+                        self.valid_start_frame.append(cur_idx)
+                        self.valid_end_frame.append(num_frames)
+                        break
+    def load_annotations_s2g(self):
+        self.name_list = []
+        self.speech_idx = []
+        for name in open(self.anno_file):
+            name = name.strip()
+            meta_path = os.path.join(self.meta_dir, name + ".json")
+            meta_data = json.load(open(meta_path))
+            downrate = int(meta_data['framerate'] / self.target_framerate + 0.1)
+            speech_path = os.path.join(self.speech_dir, name + ".json")
+            speech_data = json.load(open(speech_path))
+            for i, anno in enumerate(speech_data):
+                start_frame = anno['start_frame'] // downrate
+                end_frame = min(anno['end_frame'], meta_data['num_frames']) // downrate
+                num_frame = end_frame - start_frame
+                if num_frame < self.tgt_min_motion_length or num_frame > self.tgt_max_motion_length:
+                    continue
+                self.name_list.append(name)
+                self.speech_idx.append(i)
+    def load_annotations_m2d(self):
+        self.name_list = []
+        self.music_idx = []
+        for name in open(self.anno_file):
+            name = name.strip()
+            meta_path = os.path.join(self.meta_dir, name + ".json")
+            meta_data = json.load(open(meta_path))
+            downrate = int(meta_data['framerate'] / self.target_framerate + 0.1)
+            music_path = os.path.join(self.music_dir, name + ".json")
+            music_data = json.load(open(music_path))
+            for i, anno in enumerate(music_data):
+                start_frame = anno['start_frame'] // downrate
+                end_frame = min(anno['end_frame'], meta_data['num_frames']) // downrate
+                num_frame = end_frame - start_frame
+                if num_frame < self.tgt_min_motion_length or num_frame > self.tgt_max_motion_length:
+                    continue
+                self.name_list.append(name)
+                self.music_idx.append(i)
+    def prepare_data_base(self, idx: int) -> dict:
+        results = {}
+        name = self.name_list[idx]
+        results['motion_path'] = os.path.join(self.motion_dir, name + ".npz")
+        meta_path = os.path.join(self.meta_dir, name + ".json")
+        meta_data = json.load(open(meta_path))
+        meta_data['dataset_name'] = self.dataset_name
+        results['meta_data'] = meta_data
+        results['meta_data']['sample_idx'] = idx
+        results.update({
+            'text_word_feat': np.zeros((77, 1024)).astype(np.float32),
+            'text_seq_feat': np.zeros((1024)).astype(np.float32),
+            'text_cond': 0,
+            'music_word_feat': np.zeros((229, 768)).astype(np.float32),
+            'music_seq_feat': np.zeros((1024)).astype(np.float32),
+            'music_cond': 0,
+            'speech_word_feat': np.zeros((229, 768)).astype(np.float32),
+            'speech_seq_feat': np.zeros((1024)).astype(np.float32),
+            'speech_cond': 0,
+            'video_seq_feat': np.zeros((1024)).astype(np.float32),
+            'video_cond': 0,
+        })
+        return results
+    def prepare_data(self, idx: int) -> dict:
+        if self.task_name == 'mocap':
+            results = self.prepare_data_mocap(idx)
+        elif self.task_name == 't2m':
+             results = self.prepare_data_t2m(idx)
+        elif self.task_name == 'v2m':
+             results = self.prepare_data_v2m(idx)
+        elif self.task_name == 's2g':
+             results = self.prepare_data_s2g(idx)
+        elif self.task_name == 'm2d':
+             results = self.prepare_data_m2d(idx)
+        else:
+            raise NotImplementedError()
+        results = self.pipeline(results)
+        return results
+    def prepare_data_mocap(self, idx: int) -> dict:
+        results = self.prepare_data_base(idx)
+        if self.test_mode:
+            results['meta_data']['start_frame'] = self.src_start_frame[idx]
+            results['meta_data']['end_frame'] = self.src_end_frame[idx]
+            results['context_mask'] = np.concatenate(
+                (np.ones((self.mp_input_length - 1)), np.zeros((self.mp_output_length))),
+                axis=-1
+            )
+        return results
+    def prepare_data_t2m(self, idx: int) -> dict:
+        results = self.prepare_data_base(idx)
+        name = self.name_list[idx]
+        text_idx = self.text_idx[idx]
+        text_path = os.path.join(self.text_dir, name + ".json")
+        text_data = json.load(open(text_path))[text_idx]
+        text_feat_path = os.path.join(self.text_feat_dir, name + ".pkl")
+        text_feat_data = pkl.load(open(text_feat_path, "rb"))
+        text_list = text_data['body_text']
+        tid = np.random.randint(len(text_list))
+        text = text_list[tid]
+        text_word_feat = text_feat_data['text_word_feats'][text_idx][tid]
+        text_seq_feat = text_feat_data['text_seq_feats'][text_idx][tid]
+        assert text_word_feat.shape[0] == 77
+        assert text_word_feat.shape[1] == 1024
+        assert text_seq_feat.shape[0] == 1024
+        if self.test_mode:
+            motion_path = os.path.join(self.eval_motion_dir, name + ".npy")
+            motion_data = np.load(motion_path)
+            assert not np.isnan(motion_data).any()
+            downrate = int(results['meta_data']['framerate'] / self.target_framerate + 0.1)
+            start_frame = text_data['start_frame'] // downrate
+            end_frame = text_data['end_frame'] // downrate
+            motion_data = motion_data[start_frame: end_frame]
+            results['meta_data']['framerate'] = self.target_framerate
+            results['meta_data']['rotation_type'] = self.test_rotation_type
+            assert motion_data.shape[0] > 0
+            if 'body_tokens' in text_data:
+                token = text_data['body_tokens'][tid]
+            else:
+                token = ""
+            text_cond = 1
+            results.update({
+                'motion': motion_data,
+                'text_word_feat': text_word_feat,
+                'text_seq_feat': text_seq_feat,
+                'text_cond': text_cond,
+                'text': text,
+                'token': token
+            })
+        else:
+            results['meta_data']['start_frame'] = text_data['start_frame']
+            results['meta_data']['end_frame'] = text_data['end_frame']
+            text_cond = 1
+            results.update({
+                'text_word_feat': text_word_feat,
+                'text_seq_feat': text_seq_feat,
+                'text_cond': text_cond
+            })
+        return results
+    def prepare_data_v2m(self, idx: int) -> dict:
+        results = self.prepare_data_base(idx)
+        name = self.name_list[idx]
+        video_feat_path = os.path.join(self.video_feat_dir, name + ".pkl")
+        video_feat_data = pkl.load(open(video_feat_path, "rb"))
+        video_word_feat = video_feat_data['video_word_feats']
+        video_seq_feat = video_feat_data['video_seq_feats']
+        assert video_word_feat.shape[0] == results['meta_data']['num_frames']
+        assert video_word_feat.shape[1] == 1024
+        assert video_seq_feat.shape[0] == 1024
+        video_cond = 1
+        if self.test_mode:
+            results['meta_data']['start_frame'] = self.start_frame[idx]
+            results['meta_data']['end_frame'] = self.end_frame[idx]
+            motion_path = os.path.join(self.eval_motion_dir, name + ".npy")
+            motion_data = np.load(motion_path)
+            assert not np.isnan(motion_data).any()
+            start_frame = self.start_frame[idx]
+            end_frame = self.end_frame[idx]
+            motion_data = motion_data[start_frame: end_frame]
+            video_word_feat = video_word_feat[start_frame: end_frame]
+            results['meta_data']['framerate'] = self.target_framerate
+            results['meta_data']['rotation_type'] = self.test_rotation_type
+            assert motion_data.shape[0] > 0
+            results.update({
+                'motion': motion_data,
+                'video_word_feat': video_word_feat,
+                'video_seq_feat': video_seq_feat,
+                'video_cond': video_cond
+            })
+        else:
+            results.update({
+                'video_word_feat': video_word_feat,
+                'video_seq_feat': video_seq_feat,
+                'video_cond': video_cond
+            })
+        return results
+    def prepare_data_s2g(self, idx: int) -> dict:
+        results = self.prepare_data_base(idx)
+        name = self.name_list[idx]
+        speech_idx = self.speech_idx[idx]
+        speech_path = os.path.join(self.speech_dir, name + ".json")
+        speech_data = json.load(open(speech_path))[speech_idx]
+        speech_feat_path = os.path.join(self.speech_feat_dir, name + ".pkl")
+        speech_feat_data = pkl.load(open(speech_feat_path, "rb"))
+        try:
+            speech_word_feat = speech_feat_data['audio_word_feats'][speech_idx]
+            speech_seq_feat = speech_feat_data['audio_seq_feats'][speech_idx]
+        except:
+            speech_word_feat = speech_feat_data['speech_word_feats'][speech_idx]
+            speech_seq_feat = speech_feat_data['speech_seq_feats'][speech_idx]
+        del speech_feat_data
+        assert speech_word_feat.shape[0] == 229
+        assert speech_word_feat.shape[1] == 768
+        assert speech_seq_feat.shape[0] == 1024
+        results['meta_data']['start_frame'] = speech_data['start_frame']
+        results['meta_data']['end_frame'] = speech_data['end_frame']
+        speech_cond = 1
+        results.update({
+            'speech_word_feat': speech_word_feat,
+            'speech_seq_feat': speech_seq_feat,
+            'speech_cond': speech_cond
+        })
+        if self.test_mode:
+            results['meta_data']['framerate'] = self.target_framerate
+            results['meta_data']['rotation_type'] = self.test_rotation_type
+            eval_data_path = os.path.join(self.eval_motion_dir, name + ".npz")
+            eval_data = np.load(eval_data_path)
+            motion_data = eval_data["bvh_rot_beat141"]
+            sem_data = eval_data["sem"]
+            wav_data = eval_data["wave16k"]
+            assert not np.isnan(motion_data).any()
+            start_frame = results['meta_data']['start_frame']
+            end_frame = results['meta_data']['end_frame']
+            wav_start_frame = start_frame / results['meta_data']['framerate'] * 16000
+            wav_end_frame = end_frame / results['meta_data']['framerate'] * 16000
+            motion_data = motion_data[start_frame: end_frame]
+            sem_data = sem_data[start_frame: end_frame]
+            wav_data = wav_data[wav_start_frame: wav_end_frame]
+            assert motion_data.shape[0] > 0
+            results.update({
+                'motion': motion_data,
+                'sem_score': sem_data,
+                'wav_feat': wav_data
+            })
+        return results
+    def prepare_data_m2d(self, idx: int) -> dict:
+        results = self.prepare_data_base(idx)
+        name = self.name_list[idx]
+        music_idx = self.music_idx[idx]
+        music_path = os.path.join(self.music_dir, name + ".json")
+        music_data = json.load(open(music_path))[music_idx]
+        music_feat_path = os.path.join(self.music_feat_dir, name + ".pkl")
+        music_feat_data = pkl.load(open(music_feat_path, "rb"))
+        music_word_feat = music_feat_data['audio_word_feats'][music_idx]
+        music_seq_feat = music_feat_data['audio_seq_feats'][music_idx]
+        assert music_word_feat.shape[0] == 229
+        assert music_word_feat.shape[1] == 768
+        assert music_seq_feat.shape[0] == 1024
+        results['meta_data']['start_frame'] = music_data['start_frame']
+        results['meta_data']['end_frame'] = music_data['end_frame']
+        music_cond = 1
+        results.update({
+            'music_word_feat': music_word_feat,
+            'music_seq_feat': music_seq_feat,
+            'music_cond': music_cond
+        })
+        return results
+    def prepare_evaluation(self):
+        """
+        Prepare the dataset for evaluation by initializing evaluators and creating evaluation indexes.
+        """
+        self.evaluators = []
+        self.eval_indexes = []
+        self.evaluator_model = build_submodule(self.eval_cfg.get('evaluator_model', None))
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        if self.evaluator_model is not None:
+            self.evaluator_model = self.evaluator_model.to(device)
+            self.evaluator_model.eval()
+        self.eval_cfg['evaluator_model'] = self.evaluator_model
+        for _ in range(self.eval_cfg['replication_times']):
+            eval_indexes = np.arange(len(self.name_list))
+            if self.eval_cfg.get('shuffle_indexes', False):
+                np.random.shuffle(eval_indexes)
+            self.eval_indexes.append(eval_indexes)
+        for metric in self.eval_cfg['metrics']:
+            evaluator, self.eval_indexes = build_evaluator(
+                metric, self.eval_cfg, len(self.name_list), self.eval_indexes)
+            self.evaluators.append(evaluator)
+        self.eval_indexes = np.concatenate(self.eval_indexes)
+    def process_outputs(self, results):
+        return results
+    def evaluate(self, results: List[dict], work_dir: str, logger=None) -> dict:
+        """
+        Evaluate the model performance based on the results.
+        Args:
+            results (list): A list of result dictionaries.
+            work_dir (str): Directory where evaluation logs will be stored.
+            logger: Logger object to record evaluation results (optional).
+        Returns:
+            dict: Dictionary containing evaluation metrics.
+        """
+        metrics = {}
+        results = self.process_outputs(results)
+        for evaluator in self.evaluators:
+            metrics.update(evaluator.evaluate(results))
+        if logger is not None:
+            logger.info(metrics)
+        eval_output = os.path.join(work_dir, 'eval_results.log')
+        with open(eval_output, 'w') as f:
+            for k, v in metrics.items():
+                f.write(k + ': ' + str(v) + '\n')
+        return metrics
+def create_single_dataset(cfg: dict):
+    dataset_path = cfg['dataset_path']
+    if dataset_path == 'amass':
+        return MotionVerseAMASS(**cfg)
+    elif dataset_path == 'humanml3d':
+        return MotionVerseH3D(**cfg)
+    elif dataset_path == 'kitml':
+        return MotionVerseKIT(**cfg)
+    elif dataset_path == 'babel':
+        return MotionVerseBABEL(**cfg)
+    elif dataset_path == 'motionx':
+        return MotionVerseMotionX(**cfg)
+    elif dataset_path == 'humanact12':
+        return MotionVerseACT12(**cfg)
+    elif dataset_path == 'uestc':
+        return MotionVerseUESTC(**cfg)
+    elif dataset_path == 'ntu':
+        return MotionVerseNTU(**cfg)
+    elif dataset_path == 'h36m':
+        return MotionVerseH36M(**cfg)
+    elif dataset_path == 'mpi':
+        return MotionVerseMPI(**cfg)
+    elif dataset_path == 'pw3d':
+        return MotionVersePW3D(**cfg)
+    elif dataset_path == 'aist':
+        return MotionVerseAIST(**cfg)
+    elif dataset_path == 'beat':
+        return MotionVerseBEAT(**cfg)
+    elif dataset_path == 'tedg':
+        return MotionVerseTEDG(**cfg)
+    elif dataset_path == 'tedex':
+        return MotionVerseTEDEx(**cfg)
+    elif dataset_path == 's2g3d':
+        return MotionVerseS2G3D(**cfg)
+    else:
+        raise NotImplementedError()
+@DATASETS.register_module()
+class MotionVerse(Dataset):
+    """
+    A dataset class that handles multiple MotionBench datasets.
+    Args:
+        dataset_cfgs (list[str]): List of dataset configurations.
+        partitions (list[float]): List of partition weights corresponding to the datasets.
+        num_data (Optional[int]): Number of data samples to load. Defaults to None.
+        data_prefix (str): Path to the directory containing the dataset.
+    """
+    def __init__(self,
+                 dataset_cfgs: List[dict],
+                 partitions: List[int],
+                 num_data: Optional[int] = None,
+                 data_prefix: Optional[str] = None):
+        """Load data from multiple datasets."""
+        assert min(partitions) >= 0
+        assert len(dataset_cfgs) == len(partitions)
+        datasets = []
+        new_partitions = []
+        for idx, cfg in enumerate(dataset_cfgs):
+            if partitions[idx] == 0:
+                continue
+            new_partitions.append(partitions[idx])
+            cfg.update({
+                'data_prefix': data_prefix
+            })
+            datasets.append(create_single_dataset(cfg))
+        self.dataset = ConcatDataset(datasets)
+        if num_data is not None:
+            self.length = num_data
+        else:
+            self.length = max(len(ds) for ds in datasets)
+        partitions = new_partitions
+        weights = [np.ones(len(ds)) * p / len(ds) for (p, ds) in zip(partitions, datasets)]
+        weights = np.concatenate(weights, axis=0)
+        self.weights = weights
+        self.task_proj = {
+            'mocap': 0,
+            't2m': 1,
+            'v2m': 2,
+            's2g': 3,
+            'm2d': 4
+        }
+        self.task_idx_list = []
+        for ds in datasets:
+            self.task_idx_list += [self.task_proj[ds.task_name]] * len(ds)
+    def __len__(self) -> int:
+        """Get the size of the dataset."""
+        return self.length
+    def __getitem__(self, idx: int) -> dict:
+        """Given an index, sample data from multiple datasets with the specified proportion."""
+        return self.dataset[idx]
+    def get_task_idx(self, idx: int) -> int:
+        return self.task_idx_list[idx]
+@DATASETS.register_module()
+class MotionVerseEval(Dataset):
+    def __init__(self,
+                 eval_cfgs: dict,
+                 testset: str,
+                 test_mode: bool = True):
+        """Load data from multiple datasets."""
+        assert testset in eval_cfgs
+        dataset_path, task_name = testset.split('_')
+        dataset_cfg = eval_cfgs[testset]
+        dataset_cfg['dataset_path'] = dataset_path
+        dataset_cfg['task_name'] = task_name
+        dataset_cfg['test_mode'] = test_mode
+        self.dataset = create_single_dataset(dataset_cfg)
+    def __len__(self) -> int:
+        return len(self.dataset)
+    def __getitem__(self, idx: int) -> dict:
+        return self.dataset[idx]
+    def load_annotation(self):
+        self.dataset.load_annotation()
+    def prepare_data(self, idx: int) -> dict:
+        return self.dataset.prepare_data(idx)
+    def prepare_evaluation(self):
+        self.dataset.prepare_evaluation()
+    def process_outputs(self, results):
+        return self.dataset.process_outputs(results)
+    def evaluate(self, results: List[dict], work_dir: str, logger=None) -> dict:
+        return self.dataset.evaluate(results=results, work_dir=work_dir, logger=logger)
+@DATASETS.register_module()
+class MotionVerseAMASS(SingleMotionVerseDataset):
+    def __init__(self, **kwargs):
+        if 'dataset_path' not in kwargs:
+            kwargs['dataset_path'] = 'amass'
+        task_name = kwargs['task_name']
+        assert task_name in ['mocap']
+        super().__init__(**kwargs)
+@DATASETS.register_module()
+class MotionVerseH3D(SingleMotionVerseDataset):
+    def __init__(self, **kwargs):
+        if 'dataset_path' not in kwargs:
+            kwargs['dataset_path'] = 'humanml3d'
+        task_name = kwargs['task_name']
+        assert task_name in ['mocap', 't2m']
+        super().__init__(**kwargs)
+@DATASETS.register_module()
+class MotionVerseKIT(SingleMotionVerseDataset):
+    def __init__(self, **kwargs):
+        if 'dataset_path' not in kwargs:
+            kwargs['dataset_path'] = 'kitml'
+        task_name = kwargs['task_name']
+        assert task_name in ['mocap', 't2m']
+        super().__init__(**kwargs)
+@DATASETS.register_module()
+class MotionVerseBABEL(SingleMotionVerseDataset):
+    def __init__(self, **kwargs):
+        if 'dataset_path' not in kwargs:
+            kwargs['dataset_path'] = 'babel'
+        task_name = kwargs['task_name']
+        assert task_name in ['mocap', 't2m']
+        super().__init__(**kwargs)
+@DATASETS.register_module()
+class MotionVerseMotionX(SingleMotionVerseDataset):
+    def __init__(self, **kwargs):
+        if 'dataset_path' not in kwargs:
+            kwargs['dataset_path'] = 'motionx'
+        task_name = kwargs['task_name']
+        assert task_name in ['mocap', 't2m']
+        super().__init__(**kwargs)
+@DATASETS.register_module()
+class MotionVerseACT12(SingleMotionVerseDataset):
+    def __init__(self, **kwargs):
+        if 'dataset_path' not in kwargs:
+            kwargs['dataset_path'] = 'humanact12'
+        task_name = kwargs['task_name']
+        assert task_name in ['mocap', 't2m']
+        super().__init__(**kwargs)
+@DATASETS.register_module()
+class MotionVerseUESTC(SingleMotionVerseDataset):
+    def __init__(self, **kwargs):
+        if 'dataset_path' not in kwargs:
+            kwargs['dataset_path'] = 'uestc'
+        task_name = kwargs['task_name']
+        assert task_name in ['mocap', 't2m']
+        super().__init__(**kwargs)
+@DATASETS.register_module()
+class MotionVerseNTU(SingleMotionVerseDataset):
+    def __init__(self, **kwargs):
+        if 'dataset_path' not in kwargs:
+            kwargs['dataset_path'] = 'ntu'
+        task_name = kwargs['task_name']
+        assert task_name in ['mocap', 't2m']
+        super().__init__(**kwargs)
+@DATASETS.register_module()
+class MotionVerseH36M(SingleMotionVerseDataset):
+    def __init__(self, **kwargs):
+        if 'dataset_path' not in kwargs:
+            kwargs['dataset_path'] = 'h36m'
+        task_name = kwargs['task_name']
+        assert task_name in ['mocap', 'v2m']
+        super().__init__(**kwargs)
+@DATASETS.register_module()
+class MotionVerseMPI(SingleMotionVerseDataset):
+    def __init__(self, **kwargs):
+        if 'dataset_path' not in kwargs:
+            kwargs['dataset_path'] = 'mpi'
+        task_name = kwargs['task_name']
+        assert task_name in ['mocap', 'v2m']
+        super().__init__(**kwargs)
+@DATASETS.register_module()
+class MotionVersePW3D(SingleMotionVerseDataset):
+    def __init__(self, **kwargs):
+        if 'dataset_path' not in kwargs:
+            kwargs['dataset_path'] = '3dpw'
+        task_name = kwargs['task_name']
+        assert task_name in ['mocap', 'v2m']
+        super().__init__(**kwargs)
+@DATASETS.register_module()
+class MotionVerseAIST(SingleMotionVerseDataset):
+    def __init__(self, **kwargs):
+        if 'dataset_path' not in kwargs:
+            kwargs['dataset_path'] = 'aist'
+        task_name = kwargs['task_name']
+        assert task_name in ['mocap', 'm2d']
+        super().__init__(**kwargs)
+@DATASETS.register_module()
+class MotionVerseBEAT(SingleMotionVerseDataset):
+    def __init__(self, **kwargs):
+        if 'dataset_path' not in kwargs:
+            kwargs['dataset_path'] = 'beat'
+        task_name = kwargs['task_name']
+        assert task_name in ['mocap', 's2g']
+        super().__init__(**kwargs)
+@DATASETS.register_module()
+class MotionVerseTEDG(SingleMotionVerseDataset):
+    def __init__(self, **kwargs):
+        if 'dataset_path' not in kwargs:
+            kwargs['dataset_path'] = 'tedg'
+        task_name = kwargs['task_name']
+        assert task_name in ['mocap', 's2g']
+        super().__init__(**kwargs)
+@DATASETS.register_module()
+class MotionVerseTEDEx(SingleMotionVerseDataset):
+    def __init__(self, **kwargs):
+        if 'dataset_path' not in kwargs:
+            kwargs['dataset_path'] = 'tedex'
+        task_name = kwargs['task_name']
+        assert task_name in ['mocap', 's2g']
+        super().__init__(**kwargs)
+@DATASETS.register_module()
+class MotionVerseS2G3D(SingleMotionVerseDataset):
+    def __init__(self, **kwargs):
+        if 'dataset_path' not in kwargs:
+            kwargs['dataset_path'] = 's2g3d'
+        task_name = kwargs['task_name']
+        assert task_name in ['mocap', 's2g']
+        super().__init__(**kwargs)

mogen/datasets/paramUtil.py ADDED Viewed

	@@ -0,0 +1,140 @@

+# coding=utf-8
+# Copyright 2022 The IDEA Authors (Shunlin Lu and Ling-Hao Chen). All rights reserved.
+#
+# For all the datasets, be sure to read and follow their license agreements,
+# and cite them accordingly.
+# If the unifier is used in your research, please consider to cite as:
+#
+# @article{humantomato,
+#   title={HumanTOMATO: Text-aligned Whole-body Motion Generation},
+#   author={Lu, Shunlin and Chen, Ling-Hao and Zeng, Ailing and Lin, Jing and Zhang, Ruimao and Zhang, Lei and Shum, Heung-Yeung},
+#   journal={arxiv:2310.12978},
+#   year={2023}
+# }
+#
+# @InProceedings{Guo_2022_CVPR,
+#     author    = {Guo, Chuan and Zou, Shihao and Zuo, Xinxin and Wang, Sen and Ji, Wei and Li, Xingyu and Cheng, Li},
+#     title     = {Generating Diverse and Natural 3D Human Motions From Text},
+#     booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+#     month     = {June},
+#     year      = {2022},
+#     pages     = {5152-5161}
+# }
+#
+# Licensed under the IDEA License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://github.com/IDEA-Research/HumanTOMATO/blob/main/LICENSE
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License. We provide a license to use the code,
+# please read the specific details carefully.
+#
+# ------------------------------------------------------------------------------------------------
+# Copyright (c) Chuan Guo.
+# ------------------------------------------------------------------------------------------------
+# Portions of this code were adapted from the following open-source project:
+# https://github.com/EricGuo5513/HumanML3D
+# ------------------------------------------------------------------------------------------------
+import numpy as np
+# Define a kinematic tree for the skeletal struture
+kit_kinematic_chain = [[0, 11, 12, 13, 14, 15], [0, 16, 17, 18, 19, 20], [0, 1, 2, 3, 4], [3, 5, 6, 7], [3, 8, 9, 10]]
+kit_raw_offsets = np.array(
+    [
+        [0, 0, 0],
+        [0, 1, 0],
+        [0, 1, 0],
+        [0, 1, 0],
+        [0, 1, 0],
+        [1, 0, 0],
+        [0, -1, 0],
+        [0, -1, 0],
+        [-1, 0, 0],
+        [0, -1, 0],
+        [0, -1, 0],
+        [1, 0, 0],
+        [0, -1, 0],
+        [0, -1, 0],
+        [0, 0, 1],
+        [0, 0, 1],
+        [-1, 0, 0],
+        [0, -1, 0],
+        [0, -1, 0],
+        [0, 0, 1],
+        [0, 0, 1]
+    ]
+)
+t2m_raw_body_offsets = np.array([[0,0,0],
+                           [1,0,0],
+                           [-1,0,0],
+                           [0,1,0],
+                           [0,-1,0],
+                           [0,-1,0],
+                           [0,1,0],
+                           [0,-1,0],
+                           [0,-1,0],
+                           [0,1,0],
+                           [0,0,1],
+                           [0,0,1],
+                           [0,1,0],
+                           [1,0,0],
+                           [-1,0,0],
+                           [0,0,1],
+                           [0,-1,0],
+                           [0,-1,0],
+                           [0,-1,0],
+                           [0,-1,0],
+                           [0,-1,0],
+                           [0,-1,0]])
+t2m_raw_hand_offsets = np.array([[1, 0, 0],  # left_index1
+                                [1, 0, 0],  # left_index2
+                                [1, 0, 0],  # left_index3
+                                [1, 0, 0],  # left_middle1
+                                [1, 0, 0],  # left_middle2
+                                [1, 0, 0],  # left_middle3
+                                [1, 0, 0],  # left_pinky1
+                                [1, 0, 0],  # left_pinky2
+                                [1, 0, 0],  # left_pinky3
+                                [1, 0, 0],  # left_ring1
+                                [1, 0, 0],  # left_ring2
+                                [1, 0, 0],  # left_ring3
+                                [1, 0, 0],  # left_thumb1
+                                [1, 0, 0],  # left_thumb2
+                                [1, 0, 0],  # left_thumb3
+                                [-1, 0, 0],  # right_index1
+                                [-1, 0, 0],  # right_index2
+                                [-1, 0, 0],  # right_index3
+                                [-1, 0, 0],  # right_middle1
+                                [-1, 0, 0],  # right_middle2
+                                [-1, 0, 0],  # right_middle3
+                                [-1, 0, 0],  # right_pinky1
+                                [-1, 0, 0],  # right_pinky2
+                                [-1, 0, 0],  # right_pinky3
+                                [-1, 0, 0],  # right_ring1
+                                [-1, 0, 0],  # right_ring2
+                                [-1, 0, 0],  # right_ring3
+                                [-1, 0, 0],  # right_thumb1
+                                [-1, 0, 0],  # right_thumb2
+                                [-1, 0, 0],])  # right_thumb3
+t2m_raw_offsets = np.concatenate(
+    (t2m_raw_body_offsets, t2m_raw_hand_offsets), axis=0)
+t2m_kinematic_chain = [[0, 2, 5, 8, 11], [0, 1, 4, 7, 10], [0, 3, 6, 9, 12, 15], [9, 14, 17, 19, 21], [9, 13, 16, 18, 20]]
+t2m_left_hand_chain = [[20, 22, 23, 24], [20, 34, 35, 36], [20, 25, 26, 27], [20, 31, 32, 33], [20, 28, 29, 30]]
+t2m_right_hand_chain = [[21, 43, 44, 45], [21, 46, 47, 48], [21, 40, 41, 42], [21, 37, 38, 39], [21, 49, 50, 51]]
+t2m_body_hand_kinematic_chain = t2m_kinematic_chain + t2m_left_hand_chain + t2m_right_hand_chain
+kit_tgt_skel_id = '03950'
+t2m_tgt_skel_id = '000021'

mogen/datasets/pipelines/__init__.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from .compose import Compose
+from .formatting import (
+    Collect,
+    ToTensor,
+    Transpose,
+    WrapFieldsToLists,
+    to_tensor
+)
+from .siamese_motion import ProcessSiameseMotion, SwapSiameseMotion
+from .transforms import Crop, Normalize, RandomCrop
+from .motionverse import (
+    LoadMotion,
+    RetargetSkeleton,
+    MotionDownsample,
+    PutOnFloor,
+    MoveToOrigin,
+    RotateToZ,
+    KeypointsToTomato,
+    RandomCropKeypoints,
+    MaskedCrop,
+    MaskedRandomCrop
+)
+__all__ = [
+    'Compose', 'to_tensor', 'Transpose', 'Collect', 'WrapFieldsToLists',
+    'ToTensor', 'Crop', 'RandomCrop', 'Normalize', 'SwapSiameseMotion',
+    'ProcessSiameseMotion', 'LoadMotion', 'RetargetSkeleton', 'MotionDownsample',
+    'PutOnFloor', 'MoveToOrigin', 'RotateToZ', 'KeypointsToTomato', 'RandomCropKeypoints',
+    'MaskedCrop', 'MaskedRandomCrop'
+]

mogen/datasets/pipelines/compose.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from collections.abc import Sequence
+from mmcv.utils import build_from_cfg
+from ..builder import PIPELINES
+@PIPELINES.register_module()
+class Compose(object):
+    """Compose a data pipeline with a sequence of transforms.
+    Args:
+        transforms (list[dict | callable]):
+            Either config dicts of transforms or transform objects.
+    """
+    def __init__(self, transforms):
+        assert isinstance(transforms, Sequence)
+        self.transforms = []
+        for transform in transforms:
+            if isinstance(transform, dict):
+                transform = build_from_cfg(transform, PIPELINES)
+                self.transforms.append(transform)
+            elif callable(transform):
+                self.transforms.append(transform)
+            else:
+                raise TypeError('transform must be callable or a dict, but got'
+                                f' {type(transform)}')
+    def __call__(self, data):
+        for t in self.transforms:
+            data = t(data)
+            if data is None:
+                return None
+        return data
+    def __repr__(self):
+        format_string = self.__class__.__name__ + '('
+        for t in self.transforms:
+            format_string += f'\n    {t}'
+        format_string += '\n)'
+        return format_string

mogen/datasets/pipelines/formatting.py ADDED Viewed

	@@ -0,0 +1,135 @@

+from collections.abc import Sequence
+import mmcv
+import numpy as np
+import torch
+from mmcv.parallel import DataContainer as DC
+from ..builder import PIPELINES
+def to_tensor(data):
+    """Convert objects of various python types to :obj:`torch.Tensor`.
+    Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
+    :class:`Sequence`, :class:`int` and :class:`float`.
+    """
+    if isinstance(data, torch.Tensor):
+        return data
+    elif isinstance(data, np.ndarray):
+        return torch.from_numpy(data)
+    elif isinstance(data, Sequence) and not mmcv.is_str(data):
+        return torch.tensor(data)
+    elif isinstance(data, int):
+        return torch.LongTensor([data])
+    elif isinstance(data, float):
+        return torch.FloatTensor([data])
+    else:
+        raise TypeError(
+            f'Type {type(data)} cannot be converted to tensor.'
+            'Supported types are: `numpy.ndarray`, `torch.Tensor`, '
+            '`Sequence`, `int` and `float`')
+@PIPELINES.register_module()
+class ToTensor(object):
+    def __init__(self, keys):
+        self.keys = keys
+    def __call__(self, results):
+        for key in self.keys:
+            results[key] = to_tensor(results[key])
+        return results
+    def __repr__(self):
+        return self.__class__.__name__ + f'(keys={self.keys})'
+@PIPELINES.register_module()
+class Transpose(object):
+    def __init__(self, keys, order):
+        self.keys = keys
+        self.order = order
+    def __call__(self, results):
+        for key in self.keys:
+            results[key] = results[key].transpose(self.order)
+        return results
+    def __repr__(self):
+        return self.__class__.__name__ + \
+            f'(keys={self.keys}, order={self.order})'
+@PIPELINES.register_module()
+class Collect(object):
+    """Collect data from the loader relevant to the specific task.
+    This is usually the last stage of the data loader pipeline.
+    Args:
+        keys (Sequence[str]): Keys of results to be collected in ``data``.
+        meta_keys (Sequence[str], optional): Meta keys to be converted to
+            ``mmcv.DataContainer`` and collected in ``data[motion_metas]``.
+            Default: ``('filename', 'ori_filename',
+                'ori_shape', 'motion_shape', 'motion_mask')``
+    Returns:
+        dict: The result dict contains the following keys
+                - keys in``self.keys``
+                - ``motion_metas`` if available
+    """
+    def __init__(self,
+                 keys,
+                 meta_keys=('filename', 'ori_filename', 'ori_shape',
+                            'motion_shape', 'motion_mask')):
+        self.keys = keys
+        self.meta_keys = meta_keys
+    def __call__(self, results):
+        data = {}
+        motion_meta = {}
+        for key in self.meta_keys:
+            if key in results:
+                motion_meta[key] = results[key]
+        data['motion_metas'] = DC(motion_meta, cpu_only=True)
+        for key in self.keys:
+            data[key] = results[key]
+        return data
+    def __repr__(self):
+        return self.__class__.__name__ + \
+            f'(keys={self.keys}, meta_keys={self.meta_keys})'
+@PIPELINES.register_module()
+class WrapFieldsToLists(object):
+    """Wrap fields of the data dictionary into lists for evaluation.
+    This class can be used as a last step of a test or validation
+    pipeline for single image evaluation or inference.
+    Example:
+        >>> test_pipeline = [
+        >>>    dict(type='LoadImageFromFile'),
+        >>>    dict(type='Normalize',
+                    mean=[123.675, 116.28, 103.53],
+                    std=[58.395, 57.12, 57.375],
+                    to_rgb=True),
+        >>>    dict(type='ImageToTensor', keys=['img']),
+        >>>    dict(type='Collect', keys=['img']),
+        >>>    dict(type='WrapIntoLists')
+        >>> ]
+    """
+    def __call__(self, results):
+        # Wrap dict fields into lists
+        for key, val in results.items():
+            results[key] = [val]
+        return results
+    def __repr__(self):
+        return f'{self.__class__.__name__}()'