SynTalker / configs /diffusion_rvqvae_128.yaml
robinwitch's picture
update
1da48bb
raw
history blame
2.21 kB
is_train: True
ddp: False
stat: ts
root_path: ./
out_path: ./outputs/audio2pose/
project: s2g
data_path: ./datasets/BEAT_SMPL/beat_v2.0.0/beat_english_v2.0.0/
e_path: weights/AESKConv_240_100.bin
eval_model: motion_representation
e_name: VAESKConv
test_ckpt: ./ckpt/beatx2_cospeech_diffusion/last_500.bin
data_path_1: ./datasets/hub/
pose_norm: True
mean_pose_path: ./mean_std/beatx_2_330_mean.npy
std_pose_path: ./mean_std/beatx_2_330_std.npy
mean_trans_path: ./mean_std/beatx_2_trans_mean.npy
std_trans_path: ./mean_std/beatx_2_trans_std.npy
vqvae_upper_path: ./ckpt/beatx2_rvqvae/RVQVAE_upper/net_300000.pth
vqvae_hands_path: ./ckpt/beatx2_rvqvae/RVQVAE_hands/net_300000.pth
vqvae_lower_path: ./ckpt/beatx2_rvqvae/RVQVAE_lower/net_300000.pth
vqvae_lower_trans_path: ./ckpt/beatx2_rvqvae/RVQVAE_lower_trans/net_300000.pth
use_trans: True
decay_epoch: 500
vqvae_squeeze_scale: 4
vqvae_type: rvqvae
vqvae_latent_scale: 5
vae_test_len: 32
vae_test_dim: 330
vae_test_stride: 20
vae_length: 240
vae_codebook_size: 256
vae_layer: 4
vae_grow: [1,1,2,1]
variational: False
# data config
training_speakers: [2] #[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
additional_data: False
cache_path: datasets/beat_cache/beat_smplx_en_emage_2_128/
dataset: beat_sep_lower
new_cache: False
# motion config
ori_joints: beat_smplx_joints
tar_joints: beat_smplx_full
pose_rep: smplxflame_30
pose_fps: 30
rot6d: True
pre_frames: 4
pose_dims: 330
pose_length: 128
stride: 20
test_length: 128
motion_f: 256
m_pre_encoder: null
m_encoder: null
m_fix_pre: False
audio_rep: onset+amplitude
audio_sr: 16000
audio_fps: 16000
audio_norm: False
audio_f: 256
word_rep: textgrid
word_index_num: 11195
word_dims: 300
freeze_wordembed: False
word_f: 256
t_pre_encoder: fasttext
t_encoder: null
t_fix_pre: False
facial_rep: smplxflame_30
facial_dims: 100
facial_norm: False
facial_f: 0
f_pre_encoder: null
f_encoder: null
f_fix_pre: False
id_rep: onehot
speaker_f: 0
batch_size: 40
lr_base: 5e-5
model: denoiser
g_name: MDM
trainer: diffusion_rvqvae
hidden_size: 768
n_layer: 1
rec_weight: 1
grad_norm: 0.99
epochs: 2000
test_period: 20
ll: 3
lf: 3
lu: 3
lh: 3
cl: 1
cf: 0
cu: 1
ch: 1