File size: 2,197 Bytes
373af33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
dataset_names = [
    'all', 
    'amass_mocap', 'motionx_mocap', 'humanact12_mocap', 'uestc_mocap', 'ntu_mocap', 'aist_mocap',
    'beat_mocap', 'tedg_mocap', 'tedex_mocap', 's2g3d_mocap', 'h36m_mocap', 'mpi_mocap',
    
    'humanml3d_t2m', 'kitml_t2m', 'babel_t2m', 'motionx_t2m',
    'humanact12_t2m', 'uestc_t2m', 'ntu_t2m',   
    
    'aist_m2d',
    'beat_s2g', 'tedg_s2g', 'tedex_s2g', 's2g3d_s2g',
    
    'h36m_v2m', 'mpi_v2m'
]
num_datasets = len(dataset_names)
# model settings
model = dict(
    type='UnifiedMotionDiffusion',
    model=dict(
        type='LargeMotionModel',
        input_feats=669,
        max_seq_len=200,
        num_parts=10,
        latent_part_dim=64,
        time_embed_dim=2048,
        dataset_names=dataset_names,
        num_layers=4,
        num_cond_layers=2,
        num_datasets=num_datasets,
        dropout=0,
        ca_block_cfg=dict(
            type='ArtAttention',
            num_experts=16,
            topk=4,
            gate_type='cosine_top',
            gate_noise=1.0,
            num_datasets=num_datasets,
            has_text=True,
            has_music=True,
            has_speech=True,
            has_video=True
        ),
        text_input_dim=1024,
        music_input_dim=768,
        speech_input_dim=768,
        video_input_dim=1024,
        guidance_cfg=dict(
            all=dict(type='linear', scale=5.5),
        ),
        moe_route_loss_weight=10.0,
        template_kl_loss_weight=0.0001,
        use_pos_embedding=False,
        cond_drop_rate=0.1
    ),
    loss_recon=dict(
        type='KinematicLoss', loss_type='mse', loss_weight=[20], reduction='none'),
    train_repeat=1,
    diffusion_train=dict(
        beta_scheduler='linear',
        diffusion_steps=1000,
        model_mean_type='start_x',
        model_var_type='fixed_large',
    ),
    diffusion_test_dict=dict(
        base=dict(
            beta_scheduler='linear',
            diffusion_steps=1000,
            model_mean_type='start_x',
            model_var_type='fixed_large',
        ),
        all='15,15,8,6,6'
    ),
    inference_type='ddim',
    loss_reduction='batch',
    loss_weight='data/motionverse/statistics/loss_weight.npy'
)