|
|
|
import copy |
|
import os.path as osp |
|
import tempfile |
|
|
|
import mmcv |
|
import numpy as np |
|
import pytest |
|
from numpy.testing import assert_array_almost_equal |
|
|
|
from mmpose.core import SimpleCamera |
|
from mmpose.datasets.pipelines import Compose |
|
|
|
H36M_JOINT_IDX = [14, 2, 1, 0, 3, 4, 5, 16, 12, 17, 18, 9, 10, 11, 8, 7, 6] |
|
|
|
|
|
def get_data_sample(): |
|
|
|
def _parse_h36m_imgname(imgname): |
|
"""Parse imgname to get information of subject, action and camera. |
|
|
|
A typical h36m image filename is like: |
|
S1_Directions_1.54138969_000001.jpg |
|
""" |
|
subj, rest = osp.basename(imgname).split('_', 1) |
|
action, rest = rest.split('.', 1) |
|
camera, rest = rest.split('_', 1) |
|
return subj, action, camera |
|
|
|
ann_flle = 'tests/data/h36m/test_h36m.npz' |
|
camera_param_file = 'tests/data/h36m/cameras.pkl' |
|
|
|
data = np.load(ann_flle) |
|
cameras = mmcv.load(camera_param_file) |
|
|
|
_imgnames = data['imgname'] |
|
_joints_2d = data['part'][:, H36M_JOINT_IDX].astype(np.float32) |
|
_joints_3d = data['S'][:, H36M_JOINT_IDX].astype(np.float32) |
|
_centers = data['center'].astype(np.float32) |
|
_scales = data['scale'].astype(np.float32) |
|
|
|
frame_ids = [0] |
|
target_frame_id = 0 |
|
|
|
results = { |
|
'frame_ids': frame_ids, |
|
'target_frame_id': target_frame_id, |
|
'input_2d': _joints_2d[frame_ids, :, :2], |
|
'input_2d_visible': _joints_2d[frame_ids, :, -1:], |
|
'input_3d': _joints_3d[frame_ids, :, :3], |
|
'input_3d_visible': _joints_3d[frame_ids, :, -1:], |
|
'target': _joints_3d[target_frame_id, :, :3], |
|
'target_visible': _joints_3d[target_frame_id, :, -1:], |
|
'imgnames': _imgnames[frame_ids], |
|
'scales': _scales[frame_ids], |
|
'centers': _centers[frame_ids], |
|
} |
|
|
|
|
|
subj, _, camera = _parse_h36m_imgname(_imgnames[frame_ids[0]]) |
|
results['camera_param'] = cameras[(subj, camera)] |
|
|
|
|
|
results['image_width'] = results['camera_param']['w'] |
|
results['image_height'] = results['camera_param']['h'] |
|
|
|
|
|
ann_info = {} |
|
ann_info['num_joints'] = 17 |
|
ann_info['joint_weights'] = np.full(17, 1.0, dtype=np.float32) |
|
ann_info['flip_pairs'] = [[1, 4], [2, 5], [3, 6], [11, 14], [12, 15], |
|
[13, 16]] |
|
ann_info['upper_body_ids'] = (0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) |
|
ann_info['lower_body_ids'] = (1, 2, 3, 4, 5, 6) |
|
ann_info['use_different_joint_weights'] = False |
|
|
|
results['ann_info'] = ann_info |
|
|
|
return results |
|
|
|
|
|
def test_joint_transforms(): |
|
results = get_data_sample() |
|
|
|
mean = np.random.rand(16, 3).astype(np.float32) |
|
std = np.random.rand(16, 3).astype(np.float32) + 1e-6 |
|
|
|
pipeline = [ |
|
dict( |
|
type='RelativeJointRandomFlip', |
|
item='target', |
|
flip_cfg=dict(center_mode='root', center_index=0), |
|
visible_item='target_visible', |
|
flip_prob=1., |
|
flip_camera=True), |
|
dict( |
|
type='GetRootCenteredPose', |
|
item='target', |
|
root_index=0, |
|
root_name='global_position', |
|
remove_root=True), |
|
dict( |
|
type='NormalizeJointCoordinate', item='target', mean=mean, |
|
std=std), |
|
dict(type='PoseSequenceToTensor', item='target'), |
|
dict( |
|
type='ImageCoordinateNormalization', |
|
item='input_2d', |
|
norm_camera=True), |
|
dict(type='CollectCameraIntrinsics'), |
|
dict( |
|
type='Collect', |
|
keys=[('input_2d', 'input'), ('target', 'output'), 'flip_pairs', |
|
'intrinsics'], |
|
meta_name='metas', |
|
meta_keys=['camera_param']) |
|
] |
|
|
|
pipeline = Compose(pipeline) |
|
output = pipeline(copy.deepcopy(results)) |
|
|
|
|
|
joints_0 = results['target'] |
|
joints_1 = output['output'].numpy() |
|
|
|
flip_pairs = output['flip_pairs'] |
|
_joints_0_flipped = joints_0.copy() |
|
for _l, _r in flip_pairs: |
|
_joints_0_flipped[..., _l, :] = joints_0[..., _r, :] |
|
_joints_0_flipped[..., _r, :] = joints_0[..., _l, :] |
|
_joints_0_flipped[..., |
|
0] = 2 * joints_0[..., 0:1, 0] - _joints_0_flipped[..., |
|
0] |
|
joints_0 = _joints_0_flipped |
|
joints_0 = (joints_0[..., 1:, :] - joints_0[..., 0:1, :] - mean) / std |
|
|
|
joints_0 = joints_0.reshape(-1)[..., None] |
|
np.testing.assert_array_almost_equal(joints_0, joints_1) |
|
|
|
|
|
joints_0 = results['input_2d'] |
|
joints_1 = output['input'] |
|
|
|
center = np.array( |
|
[0.5 * results['image_width'], 0.5 * results['image_height']], |
|
dtype=np.float32) |
|
scale = np.array(0.5 * results['image_width'], dtype=np.float32) |
|
joints_0 = (joints_0 - center) / scale |
|
np.testing.assert_array_almost_equal(joints_0, joints_1) |
|
|
|
|
|
camera_param_0 = results['camera_param'] |
|
camera_param_1 = output['metas'].data['camera_param'] |
|
|
|
camera_param_0['c'][0] *= -1 |
|
camera_param_0['p'][0] *= -1 |
|
camera_param_0['c'] = (camera_param_0['c'] - |
|
np.array(center)[:, None]) / scale |
|
camera_param_0['f'] = camera_param_0['f'] / scale |
|
np.testing.assert_array_almost_equal(camera_param_0['c'], |
|
camera_param_1['c']) |
|
np.testing.assert_array_almost_equal(camera_param_0['f'], |
|
camera_param_1['f']) |
|
|
|
|
|
intrinsics_0 = np.concatenate([ |
|
results['camera_param']['f'].reshape(2), |
|
results['camera_param']['c'].reshape(2), |
|
results['camera_param']['k'].reshape(3), |
|
results['camera_param']['p'].reshape(2) |
|
]) |
|
intrinsics_1 = output['intrinsics'] |
|
np.testing.assert_array_almost_equal(intrinsics_0, intrinsics_1) |
|
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
norm_param = {'mean': mean, 'std': std} |
|
norm_param_file = osp.join(tmpdir, 'norm_param.pkl') |
|
mmcv.dump(norm_param, norm_param_file) |
|
|
|
pipeline = [ |
|
dict( |
|
type='NormalizeJointCoordinate', |
|
item='target', |
|
norm_param_file=norm_param_file), |
|
] |
|
pipeline = Compose(pipeline) |
|
|
|
|
|
def test_camera_projection(): |
|
results = get_data_sample() |
|
pipeline_1 = [ |
|
dict( |
|
type='CameraProjection', |
|
item='input_3d', |
|
output_name='input_3d_w', |
|
camera_type='SimpleCamera', |
|
mode='camera_to_world'), |
|
dict( |
|
type='CameraProjection', |
|
item='input_3d_w', |
|
output_name='input_3d_wp', |
|
camera_type='SimpleCamera', |
|
mode='world_to_pixel'), |
|
dict( |
|
type='CameraProjection', |
|
item='input_3d', |
|
output_name='input_3d_p', |
|
camera_type='SimpleCamera', |
|
mode='camera_to_pixel'), |
|
dict(type='Collect', keys=['input_3d_wp', 'input_3d_p'], meta_keys=[]) |
|
] |
|
camera_param = results['camera_param'].copy() |
|
camera_param['K'] = np.concatenate( |
|
(np.diagflat(camera_param['f']), camera_param['c']), axis=-1) |
|
pipeline_2 = [ |
|
dict( |
|
type='CameraProjection', |
|
item='input_3d', |
|
output_name='input_3d_w', |
|
camera_type='SimpleCamera', |
|
camera_param=camera_param, |
|
mode='camera_to_world'), |
|
dict( |
|
type='CameraProjection', |
|
item='input_3d_w', |
|
output_name='input_3d_wp', |
|
camera_type='SimpleCamera', |
|
camera_param=camera_param, |
|
mode='world_to_pixel'), |
|
dict( |
|
type='CameraProjection', |
|
item='input_3d', |
|
output_name='input_3d_p', |
|
camera_type='SimpleCamera', |
|
camera_param=camera_param, |
|
mode='camera_to_pixel'), |
|
dict( |
|
type='CameraProjection', |
|
item='input_3d_w', |
|
output_name='input_3d_wc', |
|
camera_type='SimpleCamera', |
|
camera_param=camera_param, |
|
mode='world_to_camera'), |
|
dict( |
|
type='Collect', |
|
keys=['input_3d_wp', 'input_3d_p', 'input_2d'], |
|
meta_keys=[]) |
|
] |
|
|
|
output1 = Compose(pipeline_1)(results) |
|
output2 = Compose(pipeline_2)(results) |
|
|
|
np.testing.assert_allclose( |
|
output1['input_3d_wp'], output1['input_3d_p'], rtol=1e-6) |
|
|
|
np.testing.assert_allclose( |
|
output2['input_3d_wp'], output2['input_3d_p'], rtol=1e-6) |
|
|
|
np.testing.assert_allclose( |
|
output2['input_3d_p'], output2['input_2d'], rtol=1e-3, atol=1e-1) |
|
|
|
|
|
with pytest.raises(ValueError): |
|
|
|
camera_param_wo_intrinsic = camera_param.copy() |
|
camera_param_wo_intrinsic.pop('K') |
|
camera_param_wo_intrinsic.pop('f') |
|
camera_param_wo_intrinsic.pop('c') |
|
_ = Compose([ |
|
dict( |
|
type='CameraProjection', |
|
item='input_3d', |
|
camera_type='SimpleCamera', |
|
camera_param=camera_param_wo_intrinsic, |
|
mode='camera_to_pixel') |
|
]) |
|
|
|
with pytest.raises(ValueError): |
|
|
|
_ = Compose([ |
|
dict( |
|
type='CameraProjection', |
|
item='input_3d', |
|
camera_type='SimpleCamera', |
|
camera_param=camera_param, |
|
mode='dummy') |
|
]) |
|
|
|
|
|
camera_param_wo_undistortion = camera_param.copy() |
|
camera_param_wo_undistortion.pop('k') |
|
camera_param_wo_undistortion.pop('p') |
|
_ = Compose([ |
|
dict( |
|
type='CameraProjection', |
|
item='input_3d', |
|
camera_type='SimpleCamera', |
|
camera_param=camera_param_wo_undistortion, |
|
mode='camera_to_pixel') |
|
]) |
|
|
|
|
|
camera = SimpleCamera(camera_param_wo_undistortion) |
|
kpt_camera = np.random.rand(14, 3) |
|
kpt_pixel = camera.camera_to_pixel(kpt_camera) |
|
_kpt_camera = camera.pixel_to_camera( |
|
np.concatenate([kpt_pixel, kpt_camera[:, [2]]], -1)) |
|
assert_array_almost_equal(_kpt_camera, kpt_camera, decimal=4) |
|
|
|
|
|
def test_3d_heatmap_generation(): |
|
ann_info = dict( |
|
image_size=np.array([256, 256]), |
|
heatmap_size=np.array([64, 64, 64]), |
|
heatmap3d_depth_bound=400.0, |
|
num_joints=17, |
|
joint_weights=np.ones((17, 1), dtype=np.float32), |
|
use_different_joint_weights=False) |
|
|
|
results = dict( |
|
joints_3d=np.zeros([17, 3]), |
|
joints_3d_visible=np.ones([17, 3]), |
|
ann_info=ann_info) |
|
|
|
pipeline = Compose([dict(type='Generate3DHeatmapTarget')]) |
|
results_3d = pipeline(results) |
|
assert results_3d['target'].shape == (17, 64, 64, 64) |
|
assert results_3d['target_weight'].shape == (17, 1) |
|
|
|
|
|
pipeline = Compose( |
|
[dict(type='Generate3DHeatmapTarget', joint_indices=[0, 8, 16])]) |
|
results_3d = pipeline(results) |
|
assert results_3d['target'].shape == (3, 64, 64, 64) |
|
assert results_3d['target_weight'].shape == (3, 1) |
|
|
|
|
|
def test_voxel3D_heatmap_generation(): |
|
heatmap_size = [200, 160] |
|
cube_size = [8, 8, 2] |
|
ann_info = dict( |
|
image_size=np.array([800, 640]), |
|
heatmap_size=np.array([heatmap_size]), |
|
num_joints=17, |
|
num_scales=1, |
|
space_size=[12000.0, 12000.0, 2000.0], |
|
space_center=[3000.0, 4500.0, 1000.0], |
|
cube_size=cube_size) |
|
|
|
results = dict( |
|
joints_3d=np.ones([2, 17, 3]), |
|
joints_3d_visible=np.ones([2, 17, 3]), |
|
ann_info=ann_info) |
|
|
|
|
|
joint_indices = [[11, 12]] |
|
pipeline = Compose([ |
|
dict( |
|
type='GenerateVoxel3DHeatmapTarget', |
|
sigma=200.0, |
|
joint_indices=joint_indices, |
|
), |
|
]) |
|
results_ = pipeline(results) |
|
assert results_['targets_3d'].shape == (8, 8, 2) |
|
|
|
|
|
joint_indices = [0, 8, 6] |
|
pipeline = Compose([ |
|
dict( |
|
type='GenerateVoxel3DHeatmapTarget', |
|
sigma=200.0, |
|
joint_indices=joint_indices, |
|
), |
|
]) |
|
results_ = pipeline(results) |
|
assert results_['targets_3d'].shape == (3, 8, 8, 2) |
|
|
|
|
|
def test_input_heatmap_generation(): |
|
heatmap_size = [200, 160] |
|
ann_info = dict( |
|
image_size=np.array([800, 640]), |
|
heatmap_size=np.array([heatmap_size]), |
|
num_joints=17, |
|
num_scales=1, |
|
) |
|
|
|
results = dict( |
|
joints=np.zeros([2, 17, 3]), |
|
joints_visible=np.ones([2, 17, 3]), |
|
ann_info=ann_info) |
|
|
|
pipeline = dict( |
|
type='GenerateInputHeatmaps', |
|
item='joints', |
|
visible_item='joints_visible', |
|
obscured=0.0, |
|
from_pred=False, |
|
sigma=3, |
|
scale=1.0, |
|
base_size=96, |
|
target_type='gaussian', |
|
heatmap_cfg=dict( |
|
base_scale=0.9, |
|
offset=0.03, |
|
threshold=0.6, |
|
extra=[ |
|
dict(joint_ids=[7, 8], scale_factor=0.5, threshold=0.1), |
|
dict( |
|
joint_ids=[9, 10], |
|
scale_factor=0.2, |
|
threshold=0.1, |
|
), |
|
dict( |
|
joint_ids=[0, 1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16], |
|
scale_factor=0.5, |
|
threshold=0.05) |
|
])) |
|
|
|
pipelines = Compose([pipeline]) |
|
results_ = pipelines(results) |
|
assert results_['input_heatmaps'][0].shape == (17, heatmap_size[1], |
|
heatmap_size[0]) |
|
|
|
|
|
pipeline_copy = copy.deepcopy(pipeline) |
|
pipeline_copy['obscured'] = 0.5 |
|
pipelines = Compose([pipeline]) |
|
results_ = pipelines(results) |
|
assert results_['input_heatmaps'][0].shape == (17, heatmap_size[1], |
|
heatmap_size[0]) |
|
|
|
|
|
pipeline_copy = copy.deepcopy(pipeline) |
|
pipeline_copy['heatmap_cfg'] = None |
|
pipelines = Compose([pipeline]) |
|
results_ = pipelines(results) |
|
assert results_['input_heatmaps'][0].shape == (17, heatmap_size[1], |
|
heatmap_size[0]) |
|
|
|
|
|
pipeline_copy = copy.deepcopy(pipeline) |
|
pipeline_copy['from_pred'] = True |
|
pipelines = Compose([pipeline]) |
|
results_ = pipelines(results) |
|
assert results_['input_heatmaps'][0].shape == (17, heatmap_size[1], |
|
heatmap_size[0]) |
|
|
|
pipeline_copy = copy.deepcopy(pipeline) |
|
pipeline_copy['from_pred'] = True |
|
pipeline_copy['scale'] = None |
|
pipelines = Compose([pipeline]) |
|
results_ = pipelines(results) |
|
assert results_['input_heatmaps'][0].shape == (17, heatmap_size[1], |
|
heatmap_size[0]) |
|
|
|
|
|
def test_affine_joints(): |
|
ann_info = dict(image_size=np.array([800, 640])) |
|
|
|
results = dict( |
|
center=np.array([180, 144]), |
|
scale=np.array([360, 288], dtype=np.float32), |
|
rotation=0.0, |
|
joints=np.ones((3, 17, 2)), |
|
joints_visible=np.ones((3, 17, 2)), |
|
ann_info=ann_info) |
|
|
|
pipeline = Compose([ |
|
dict( |
|
type='AffineJoints', item='joints', visible_item='joints_visible') |
|
]) |
|
results_ = pipeline(results) |
|
assert results_['joints'].shape == (3, 17, 2) |
|
assert results_['joints_visible'].shape == (3, 17, 2) |
|
|
|
|
|
results_copy = copy.deepcopy(results) |
|
results_copy['joints_visible'] = np.zeros((3, 17, 2)) |
|
results_ = pipeline(results) |
|
assert results_['joints'].shape == (3, 17, 2) |
|
assert results_['joints_visible'].shape == (3, 17, 2) |
|
|