|
unet_additional_kwargs:
|
|
use_inflated_groupnorm: true
|
|
unet_use_cross_frame_attention: false
|
|
unet_use_temporal_attention: false
|
|
use_motion_module: true
|
|
motion_module_resolutions:
|
|
- 1
|
|
- 2
|
|
- 4
|
|
- 8
|
|
motion_module_mid_block: true
|
|
motion_module_decoder_only: false
|
|
motion_module_type: Vanilla
|
|
motion_module_kwargs:
|
|
num_attention_heads: 8
|
|
num_transformer_block: 1
|
|
attention_block_types:
|
|
- Temporal_Self
|
|
- Temporal_Self
|
|
temporal_position_encoding: true
|
|
temporal_position_encoding_max_len: 32
|
|
temporal_attention_dim_div: 1
|
|
|
|
noise_scheduler_kwargs:
|
|
beta_start: 0.00085
|
|
beta_end: 0.012
|
|
beta_schedule: "scaled_linear"
|
|
clip_sample: false
|
|
steps_offset: 1
|
|
|
|
prediction_type: "v_prediction"
|
|
rescale_betas_zero_snr: True
|
|
timestep_spacing: "trailing"
|
|
|
|
sampler: DDIM
|
|
|