pretrained_base_model_path: "./pretrained_weights/sd-image-variations-diffusers" pretrained_vae_path: "./pretrained_weights/sd-vae-ft-mse" image_encoder_path: "./pretrained_weights/sd-image-variations-diffusers/image_encoder" denoising_unet_path: "./pretrained_weights/denoising_unet-45000.pth" reference_unet_path: "./pretrained_weights/reference_unet-45000.pth" face_locator_path: "./pretrained_weights/face_locator-45000.pth" motion_module_path: "/nas2/juzhen.czy/exp_output_0613/stage2_whisper_xinwenlianbo_HDTF_pretrain_auged/motion_module-40000.pth" audio_mapper_path: "./pretrained_weights/audio_mapper-50000.pth" auido_guider_path: "./pretrained_weights/wav2vec2-base-960h" auto_flow_path: "./pretrained_weights/AutoFlow" inference_config: "./configs/inference/inference_v2.yaml" weight_dtype: 'fp16'