show / mmpose-0.29.0 /tests /test_pipelines /test_pose3d_transform.py
camenduru's picture
thanks to show ❤
3bbb319
raw
history blame
No virus
16.2 kB
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import os.path as osp
import tempfile
import mmcv
import numpy as np
import pytest
from numpy.testing import assert_array_almost_equal
from mmpose.core import SimpleCamera
from mmpose.datasets.pipelines import Compose
H36M_JOINT_IDX = [14, 2, 1, 0, 3, 4, 5, 16, 12, 17, 18, 9, 10, 11, 8, 7, 6]
def get_data_sample():
def _parse_h36m_imgname(imgname):
"""Parse imgname to get information of subject, action and camera.
A typical h36m image filename is like:
S1_Directions_1.54138969_000001.jpg
"""
subj, rest = osp.basename(imgname).split('_', 1)
action, rest = rest.split('.', 1)
camera, rest = rest.split('_', 1)
return subj, action, camera
ann_flle = 'tests/data/h36m/test_h36m.npz'
camera_param_file = 'tests/data/h36m/cameras.pkl'
data = np.load(ann_flle)
cameras = mmcv.load(camera_param_file)
_imgnames = data['imgname']
_joints_2d = data['part'][:, H36M_JOINT_IDX].astype(np.float32)
_joints_3d = data['S'][:, H36M_JOINT_IDX].astype(np.float32)
_centers = data['center'].astype(np.float32)
_scales = data['scale'].astype(np.float32)
frame_ids = [0]
target_frame_id = 0
results = {
'frame_ids': frame_ids,
'target_frame_id': target_frame_id,
'input_2d': _joints_2d[frame_ids, :, :2],
'input_2d_visible': _joints_2d[frame_ids, :, -1:],
'input_3d': _joints_3d[frame_ids, :, :3],
'input_3d_visible': _joints_3d[frame_ids, :, -1:],
'target': _joints_3d[target_frame_id, :, :3],
'target_visible': _joints_3d[target_frame_id, :, -1:],
'imgnames': _imgnames[frame_ids],
'scales': _scales[frame_ids],
'centers': _centers[frame_ids],
}
# add camera parameters
subj, _, camera = _parse_h36m_imgname(_imgnames[frame_ids[0]])
results['camera_param'] = cameras[(subj, camera)]
# add image size
results['image_width'] = results['camera_param']['w']
results['image_height'] = results['camera_param']['h']
# add ann_info
ann_info = {}
ann_info['num_joints'] = 17
ann_info['joint_weights'] = np.full(17, 1.0, dtype=np.float32)
ann_info['flip_pairs'] = [[1, 4], [2, 5], [3, 6], [11, 14], [12, 15],
[13, 16]]
ann_info['upper_body_ids'] = (0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
ann_info['lower_body_ids'] = (1, 2, 3, 4, 5, 6)
ann_info['use_different_joint_weights'] = False
results['ann_info'] = ann_info
return results
def test_joint_transforms():
results = get_data_sample()
mean = np.random.rand(16, 3).astype(np.float32)
std = np.random.rand(16, 3).astype(np.float32) + 1e-6
pipeline = [
dict(
type='RelativeJointRandomFlip',
item='target',
flip_cfg=dict(center_mode='root', center_index=0),
visible_item='target_visible',
flip_prob=1.,
flip_camera=True),
dict(
type='GetRootCenteredPose',
item='target',
root_index=0,
root_name='global_position',
remove_root=True),
dict(
type='NormalizeJointCoordinate', item='target', mean=mean,
std=std),
dict(type='PoseSequenceToTensor', item='target'),
dict(
type='ImageCoordinateNormalization',
item='input_2d',
norm_camera=True),
dict(type='CollectCameraIntrinsics'),
dict(
type='Collect',
keys=[('input_2d', 'input'), ('target', 'output'), 'flip_pairs',
'intrinsics'],
meta_name='metas',
meta_keys=['camera_param'])
]
pipeline = Compose(pipeline)
output = pipeline(copy.deepcopy(results))
# test transformation of target
joints_0 = results['target']
joints_1 = output['output'].numpy()
# manually do transformations
flip_pairs = output['flip_pairs']
_joints_0_flipped = joints_0.copy()
for _l, _r in flip_pairs:
_joints_0_flipped[..., _l, :] = joints_0[..., _r, :]
_joints_0_flipped[..., _r, :] = joints_0[..., _l, :]
_joints_0_flipped[...,
0] = 2 * joints_0[..., 0:1, 0] - _joints_0_flipped[...,
0]
joints_0 = _joints_0_flipped
joints_0 = (joints_0[..., 1:, :] - joints_0[..., 0:1, :] - mean) / std
# convert to [K*C, T]
joints_0 = joints_0.reshape(-1)[..., None]
np.testing.assert_array_almost_equal(joints_0, joints_1)
# test transformation of input
joints_0 = results['input_2d']
joints_1 = output['input']
# manually do transformations
center = np.array(
[0.5 * results['image_width'], 0.5 * results['image_height']],
dtype=np.float32)
scale = np.array(0.5 * results['image_width'], dtype=np.float32)
joints_0 = (joints_0 - center) / scale
np.testing.assert_array_almost_equal(joints_0, joints_1)
# test transformation of camera parameters
camera_param_0 = results['camera_param']
camera_param_1 = output['metas'].data['camera_param']
# manually flip and normalization
camera_param_0['c'][0] *= -1
camera_param_0['p'][0] *= -1
camera_param_0['c'] = (camera_param_0['c'] -
np.array(center)[:, None]) / scale
camera_param_0['f'] = camera_param_0['f'] / scale
np.testing.assert_array_almost_equal(camera_param_0['c'],
camera_param_1['c'])
np.testing.assert_array_almost_equal(camera_param_0['f'],
camera_param_1['f'])
# test CollectCameraIntrinsics
intrinsics_0 = np.concatenate([
results['camera_param']['f'].reshape(2),
results['camera_param']['c'].reshape(2),
results['camera_param']['k'].reshape(3),
results['camera_param']['p'].reshape(2)
])
intrinsics_1 = output['intrinsics']
np.testing.assert_array_almost_equal(intrinsics_0, intrinsics_1)
# test load mean/std from file
with tempfile.TemporaryDirectory() as tmpdir:
norm_param = {'mean': mean, 'std': std}
norm_param_file = osp.join(tmpdir, 'norm_param.pkl')
mmcv.dump(norm_param, norm_param_file)
pipeline = [
dict(
type='NormalizeJointCoordinate',
item='target',
norm_param_file=norm_param_file),
]
pipeline = Compose(pipeline)
def test_camera_projection():
results = get_data_sample()
pipeline_1 = [
dict(
type='CameraProjection',
item='input_3d',
output_name='input_3d_w',
camera_type='SimpleCamera',
mode='camera_to_world'),
dict(
type='CameraProjection',
item='input_3d_w',
output_name='input_3d_wp',
camera_type='SimpleCamera',
mode='world_to_pixel'),
dict(
type='CameraProjection',
item='input_3d',
output_name='input_3d_p',
camera_type='SimpleCamera',
mode='camera_to_pixel'),
dict(type='Collect', keys=['input_3d_wp', 'input_3d_p'], meta_keys=[])
]
camera_param = results['camera_param'].copy()
camera_param['K'] = np.concatenate(
(np.diagflat(camera_param['f']), camera_param['c']), axis=-1)
pipeline_2 = [
dict(
type='CameraProjection',
item='input_3d',
output_name='input_3d_w',
camera_type='SimpleCamera',
camera_param=camera_param,
mode='camera_to_world'),
dict(
type='CameraProjection',
item='input_3d_w',
output_name='input_3d_wp',
camera_type='SimpleCamera',
camera_param=camera_param,
mode='world_to_pixel'),
dict(
type='CameraProjection',
item='input_3d',
output_name='input_3d_p',
camera_type='SimpleCamera',
camera_param=camera_param,
mode='camera_to_pixel'),
dict(
type='CameraProjection',
item='input_3d_w',
output_name='input_3d_wc',
camera_type='SimpleCamera',
camera_param=camera_param,
mode='world_to_camera'),
dict(
type='Collect',
keys=['input_3d_wp', 'input_3d_p', 'input_2d'],
meta_keys=[])
]
output1 = Compose(pipeline_1)(results)
output2 = Compose(pipeline_2)(results)
np.testing.assert_allclose(
output1['input_3d_wp'], output1['input_3d_p'], rtol=1e-6)
np.testing.assert_allclose(
output2['input_3d_wp'], output2['input_3d_p'], rtol=1e-6)
np.testing.assert_allclose(
output2['input_3d_p'], output2['input_2d'], rtol=1e-3, atol=1e-1)
# test invalid camera parameters
with pytest.raises(ValueError):
# missing intrinsic parameters
camera_param_wo_intrinsic = camera_param.copy()
camera_param_wo_intrinsic.pop('K')
camera_param_wo_intrinsic.pop('f')
camera_param_wo_intrinsic.pop('c')
_ = Compose([
dict(
type='CameraProjection',
item='input_3d',
camera_type='SimpleCamera',
camera_param=camera_param_wo_intrinsic,
mode='camera_to_pixel')
])
with pytest.raises(ValueError):
# invalid mode
_ = Compose([
dict(
type='CameraProjection',
item='input_3d',
camera_type='SimpleCamera',
camera_param=camera_param,
mode='dummy')
])
# test camera without undistortion
camera_param_wo_undistortion = camera_param.copy()
camera_param_wo_undistortion.pop('k')
camera_param_wo_undistortion.pop('p')
_ = Compose([
dict(
type='CameraProjection',
item='input_3d',
camera_type='SimpleCamera',
camera_param=camera_param_wo_undistortion,
mode='camera_to_pixel')
])
# test pixel to camera transformation
camera = SimpleCamera(camera_param_wo_undistortion)
kpt_camera = np.random.rand(14, 3)
kpt_pixel = camera.camera_to_pixel(kpt_camera)
_kpt_camera = camera.pixel_to_camera(
np.concatenate([kpt_pixel, kpt_camera[:, [2]]], -1))
assert_array_almost_equal(_kpt_camera, kpt_camera, decimal=4)
def test_3d_heatmap_generation():
ann_info = dict(
image_size=np.array([256, 256]),
heatmap_size=np.array([64, 64, 64]),
heatmap3d_depth_bound=400.0,
num_joints=17,
joint_weights=np.ones((17, 1), dtype=np.float32),
use_different_joint_weights=False)
results = dict(
joints_3d=np.zeros([17, 3]),
joints_3d_visible=np.ones([17, 3]),
ann_info=ann_info)
pipeline = Compose([dict(type='Generate3DHeatmapTarget')])
results_3d = pipeline(results)
assert results_3d['target'].shape == (17, 64, 64, 64)
assert results_3d['target_weight'].shape == (17, 1)
# test joint_indices
pipeline = Compose(
[dict(type='Generate3DHeatmapTarget', joint_indices=[0, 8, 16])])
results_3d = pipeline(results)
assert results_3d['target'].shape == (3, 64, 64, 64)
assert results_3d['target_weight'].shape == (3, 1)
def test_voxel3D_heatmap_generation():
heatmap_size = [200, 160]
cube_size = [8, 8, 2]
ann_info = dict(
image_size=np.array([800, 640]),
heatmap_size=np.array([heatmap_size]),
num_joints=17,
num_scales=1,
space_size=[12000.0, 12000.0, 2000.0],
space_center=[3000.0, 4500.0, 1000.0],
cube_size=cube_size)
results = dict(
joints_3d=np.ones([2, 17, 3]),
joints_3d_visible=np.ones([2, 17, 3]),
ann_info=ann_info)
# test single joint index
joint_indices = [[11, 12]]
pipeline = Compose([
dict(
type='GenerateVoxel3DHeatmapTarget',
sigma=200.0,
joint_indices=joint_indices,
),
])
results_ = pipeline(results)
assert results_['targets_3d'].shape == (8, 8, 2)
# test multiple joint indices
joint_indices = [0, 8, 6]
pipeline = Compose([
dict(
type='GenerateVoxel3DHeatmapTarget',
sigma=200.0,
joint_indices=joint_indices,
),
])
results_ = pipeline(results)
assert results_['targets_3d'].shape == (3, 8, 8, 2)
def test_input_heatmap_generation():
heatmap_size = [200, 160]
ann_info = dict(
image_size=np.array([800, 640]),
heatmap_size=np.array([heatmap_size]),
num_joints=17,
num_scales=1,
)
results = dict(
joints=np.zeros([2, 17, 3]),
joints_visible=np.ones([2, 17, 3]),
ann_info=ann_info)
pipeline = dict(
type='GenerateInputHeatmaps',
item='joints',
visible_item='joints_visible',
obscured=0.0,
from_pred=False,
sigma=3,
scale=1.0,
base_size=96,
target_type='gaussian',
heatmap_cfg=dict(
base_scale=0.9,
offset=0.03,
threshold=0.6,
extra=[
dict(joint_ids=[7, 8], scale_factor=0.5, threshold=0.1),
dict(
joint_ids=[9, 10],
scale_factor=0.2,
threshold=0.1,
),
dict(
joint_ids=[0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16],
scale_factor=0.5,
threshold=0.05)
]))
pipelines = Compose([pipeline])
results_ = pipelines(results)
assert results_['input_heatmaps'][0].shape == (17, heatmap_size[1],
heatmap_size[0])
# test `obscured`
pipeline_copy = copy.deepcopy(pipeline)
pipeline_copy['obscured'] = 0.5
pipelines = Compose([pipeline])
results_ = pipelines(results)
assert results_['input_heatmaps'][0].shape == (17, heatmap_size[1],
heatmap_size[0])
# test `heatmap_cfg`
pipeline_copy = copy.deepcopy(pipeline)
pipeline_copy['heatmap_cfg'] = None
pipelines = Compose([pipeline])
results_ = pipelines(results)
assert results_['input_heatmaps'][0].shape == (17, heatmap_size[1],
heatmap_size[0])
# test `from_pred`
pipeline_copy = copy.deepcopy(pipeline)
pipeline_copy['from_pred'] = True
pipelines = Compose([pipeline])
results_ = pipelines(results)
assert results_['input_heatmaps'][0].shape == (17, heatmap_size[1],
heatmap_size[0])
# test `from_pred` & `scale`
pipeline_copy = copy.deepcopy(pipeline)
pipeline_copy['from_pred'] = True
pipeline_copy['scale'] = None
pipelines = Compose([pipeline])
results_ = pipelines(results)
assert results_['input_heatmaps'][0].shape == (17, heatmap_size[1],
heatmap_size[0])
def test_affine_joints():
ann_info = dict(image_size=np.array([800, 640]))
results = dict(
center=np.array([180, 144]),
scale=np.array([360, 288], dtype=np.float32),
rotation=0.0,
joints=np.ones((3, 17, 2)),
joints_visible=np.ones((3, 17, 2)),
ann_info=ann_info)
pipeline = Compose([
dict(
type='AffineJoints', item='joints', visible_item='joints_visible')
])
results_ = pipeline(results)
assert results_['joints'].shape == (3, 17, 2)
assert results_['joints_visible'].shape == (3, 17, 2)
# test `joints_visible` is zero
results_copy = copy.deepcopy(results)
results_copy['joints_visible'] = np.zeros((3, 17, 2))
results_ = pipeline(results)
assert results_['joints'].shape == (3, 17, 2)
assert results_['joints_visible'].shape == (3, 17, 2)