Spaces:
Runtime error
Runtime error
File size: 6,734 Bytes
f949b3f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 |
# pytorch_lightning==2.0.9
seed_everything: 33
trainer:
accelerator: auto
strategy: auto
devices: '8'
num_nodes: 1
precision: 16-mixed
logger: null
callbacks:
- class_path: pytorch_lightning.callbacks.RichModelSummary
init_args:
max_depth: 1
- class_path: pytorch_lightning.callbacks.RichProgressBar
init_args:
refresh_rate: 1
leave: false
theme:
description: white
progress_bar: '#6206E0'
progress_bar_finished: '#6206E0'
progress_bar_pulse: '#6206E0'
batch_progress: white
time: grey54
processing_speed: grey70
metrics: white
console_kwargs: null
fast_dev_run: false
max_epochs: 5000
min_epochs: null
max_steps: 2020000
min_steps: null
max_time: null
limit_train_batches: null
limit_val_batches: 512
limit_test_batches: null
limit_predict_batches: null
overfit_batches: 0.0
val_check_interval: 8000
check_val_every_n_epoch: 1
num_sanity_val_steps: null
log_every_n_steps: 10
enable_checkpointing: null
enable_progress_bar: null
enable_model_summary: null
accumulate_grad_batches: 8
gradient_clip_val: 1
gradient_clip_algorithm: norm
deterministic: null
benchmark: null
inference_mode: true
use_distributed_sampler: true
profiler: null
detect_anomaly: false
barebones: false
plugins: null
sync_batchnorm: false
reload_dataloaders_every_n_epochs: 0
default_root_dir: null
model:
inference_params:
class_path: t2v_enhanced.model.pl_module_params_controlnet.InferenceParams
init_args:
width: 256
height: 256
video_length: 16
guidance_scale: 7.5
use_dec_scaling: true
frame_rate: 8
num_inference_steps: 50
eta: 1.0
n_autoregressive_generations: 1
mode: long_video
start_from_real_input: true
eval_loss_metrics: false
scheduler_cls: ''
negative_prompt: ''
conditioning_from_all_past: false
validation_samples: 80
conditioning_type: last_chunk
result_formats:
- eval_gif
- gif
- mp4
concat_video: true
opt_params:
class_path: t2v_enhanced.model.pl_module_params_controlnet.OptimizerParams
init_args:
learning_rate: 5.0e-05
layers_config:
class_path: t2v_enhanced.model.requires_grad_setter.LayerConfig
init_args:
gradient_setup:
- - false
- - vae
- - false
- - text_encoder
- - false
- - image_encoder
- - true
- - resampler
- - true
- - unet
- - true
- - base_model
- - false
- - base_model
- transformer_in
- - false
- - base_model
- temp_attentions
- - false
- - base_model
- temp_convs
layers_config_base: null
use_warmup: false
warmup_steps: 10000
warmup_start_factor: 1.0e-05
learning_rate_spatial: 0.0
use_8_bit_adam: false
noise_generator: null
noise_decomposition: null
perceptual_loss: false
noise_offset: 0.0
split_opt_by_node: false
reset_prediction_type_to_eps: false
train_val_sampler_may_differ: true
measure_similarity: false
similarity_loss: false
similarity_loss_weight: 1.0
loss_conditional_weight: 0.0
loss_conditional_weight_convex: false
loss_conditional_change_after_step: 0
mask_conditional_frames: false
sample_from_noise: true
mask_alternating: false
uncondition_freq: -1
no_text_condition_control: false
inject_image_into_input: false
inject_at_T: false
resampling_steps: 1
control_freq_in_resample: 1
resample_to_T: false
adaptive_loss_reweight: false
load_resampler_from_ckpt: ''
skip_controlnet_branch: false
use_fps_conditioning: false
num_frame_embeddings_range: 16
start_frame_training: 16
start_frame_ctrl: 16
load_trained_base_model_and_resampler_from_ckpt: ''
load_trained_controlnet_from_ckpt: ''
unet_params:
class_path: t2v_enhanced.model.pl_module_params_controlnet.UNetParams
init_args:
conditioning_embedding_out_channels:
- 32
- 96
- 256
- 512
ckpt_spatial_layers: ''
pipeline_repo: damo-vilab/text-to-video-ms-1.7b
unet_from_diffusers: true
spatial_latent_input: false
num_frame_conditioning: 1
pipeline_class: t2v_enhanced.model.model.controlnet.pipeline_text_to_video_w_controlnet_synth.TextToVideoSDPipeline
frame_expansion: none
downsample_controlnet_cond: true
num_frames: 16
pre_transformer_in_cond: false
num_tranformers: 1
zero_conv_3d: false
merging_mode: addition
compute_only_conditioned_frames: false
condition_encoder: ''
zero_conv_mode: Identity
clean_model: true
merging_mode_base: attention_cross_attention
attention_mask_params: null
attention_mask_params_base: null
modelscope_input_format: true
temporal_self_attention_only_on_conditioning: false
temporal_self_attention_mask_included_itself: false
use_post_merger_zero_conv: false
weight_control_sample: 1.0
use_controlnet_mask: false
random_mask_shift: false
random_mask: false
use_resampler: true
unet_from_pipe: false
unet_operates_on_2d: false
image_encoder: CLIP
use_standard_attention_processor: false
num_frames_before_chunk: 0
resampler_type: single_frame
resampler_cls: t2v_enhanced.model.diffusers_conditional.models.controlnet.image_embedder.ImgEmbContextResampler
resampler_merging_layers: 4
image_encoder_obj:
class_path: t2v_enhanced.model.diffusers_conditional.models.controlnet.image_embedder.FrozenOpenCLIPImageEmbedder
init_args:
arch: ViT-H-14
version: laion2b_s32b_b79k
device: cuda
max_length: 77
freeze: true
antialias: true
ucg_rate: 0.0
unsqueeze_dim: false
repeat_to_max_len: false
num_image_crops: 0
output_tokens: false
cfg_text_image: false
aggregation: last_out
resampler_random_shift: true
img_cond_alpha_per_frame: false
num_control_input_frames: 8
use_image_encoder_normalization: false
use_of: false
ema_param: -1.0
concat: false
use_image_tokens_main: true
use_image_tokens_ctrl: false
result_fol: results
exp_name: my_exp_name
run_name: my_run_name
scale_lr: false
matmul_precision: high
|