Spaces:

mingyuan
/

LMM

Running on T4

App Files Files Community

LMM / configs /lmm /lmm.py

mingyuan

initial commit

373af33 16 days ago

raw

history blame contribute delete

2.2 kB

	dataset_names = [
	'all',
	'amass_mocap', 'motionx_mocap', 'humanact12_mocap', 'uestc_mocap', 'ntu_mocap', 'aist_mocap',
	'beat_mocap', 'tedg_mocap', 'tedex_mocap', 's2g3d_mocap', 'h36m_mocap', 'mpi_mocap',

	'humanml3d_t2m', 'kitml_t2m', 'babel_t2m', 'motionx_t2m',
	'humanact12_t2m', 'uestc_t2m', 'ntu_t2m',

	'aist_m2d',
	'beat_s2g', 'tedg_s2g', 'tedex_s2g', 's2g3d_s2g',

	'h36m_v2m', 'mpi_v2m'
	]
	num_datasets = len(dataset_names)
	# model settings
	model = dict(
	type='UnifiedMotionDiffusion',
	model=dict(
	type='LargeMotionModel',
	input_feats=669,
	max_seq_len=200,
	num_parts=10,
	latent_part_dim=64,
	time_embed_dim=2048,
	dataset_names=dataset_names,
	num_layers=4,
	num_cond_layers=2,
	num_datasets=num_datasets,
	dropout=0,
	ca_block_cfg=dict(
	type='ArtAttention',
	num_experts=16,
	topk=4,
	gate_type='cosine_top',
	gate_noise=1.0,
	num_datasets=num_datasets,
	has_text=True,
	has_music=True,
	has_speech=True,
	has_video=True
	),
	text_input_dim=1024,
	music_input_dim=768,
	speech_input_dim=768,
	video_input_dim=1024,
	guidance_cfg=dict(
	all=dict(type='linear', scale=5.5),
	),
	moe_route_loss_weight=10.0,
	template_kl_loss_weight=0.0001,
	use_pos_embedding=False,
	cond_drop_rate=0.1
	),
	loss_recon=dict(
	type='KinematicLoss', loss_type='mse', loss_weight=[20], reduction='none'),
	train_repeat=1,
	diffusion_train=dict(
	beta_scheduler='linear',
	diffusion_steps=1000,
	model_mean_type='start_x',
	model_var_type='fixed_large',
	),
	diffusion_test_dict=dict(
	base=dict(
	beta_scheduler='linear',
	diffusion_steps=1000,
	model_mean_type='start_x',
	model_var_type='fixed_large',
	),
	all='15,15,8,6,6'
	),
	inference_type='ddim',
	loss_reduction='batch',
	loss_weight='data/motionverse/statistics/loss_weight.npy'
	)