|
unet_additional_kwargs: |
|
use_inflated_groupnorm: true |
|
unet_use_cross_frame_attention: false |
|
unet_use_temporal_attention: false |
|
use_motion_module: true |
|
motion_module_resolutions: |
|
- 1 |
|
- 2 |
|
- 4 |
|
- 8 |
|
motion_module_mid_block: true |
|
motion_module_decoder_only: false |
|
motion_module_type: Vanilla |
|
motion_module_kwargs: |
|
num_attention_heads: 8 |
|
num_transformer_block: 1 |
|
attention_block_types: |
|
- Temporal_Self |
|
- Temporal_Self |
|
temporal_position_encoding: true |
|
temporal_position_encoding_max_len: 32 |
|
temporal_attention_dim_div: 1 |
|
|
|
noise_scheduler_kwargs: |
|
beta_start: 0.00085 |
|
beta_end: 0.012 |
|
beta_schedule: "linear" |
|
clip_sample: false |
|
steps_offset: 1 |
|
|
|
prediction_type: "v_prediction" |
|
rescale_betas_zero_snr: True |
|
timestep_spacing: "trailing" |
|
|
|
sampler: DDIM |