# import os # # import torch # # from modules import shared, paths, sd_disable_initialization, devices # # sd_configs_path = shared.sd_configs_path # # sd_repo_configs_path = os.path.join(paths.paths['Stable Diffusion'], "configs", "stable-diffusion") # # sd_xl_repo_configs_path = os.path.join(paths.paths['Stable Diffusion XL'], "configs", "inference") # # # config_default = shared.sd_default_config # # config_sd2 = os.path.join(sd_repo_configs_path, "v2-inference.yaml") # config_sd2v = os.path.join(sd_repo_configs_path, "v2-inference-v.yaml") # config_sd2_inpainting = os.path.join(sd_repo_configs_path, "v2-inpainting-inference.yaml") # config_sdxl = os.path.join(sd_xl_repo_configs_path, "sd_xl_base.yaml") # config_sdxl_refiner = os.path.join(sd_xl_repo_configs_path, "sd_xl_refiner.yaml") # config_sdxl_inpainting = os.path.join(sd_configs_path, "sd_xl_inpaint.yaml") # config_depth_model = os.path.join(sd_repo_configs_path, "v2-midas-inference.yaml") # config_unclip = os.path.join(sd_repo_configs_path, "v2-1-stable-unclip-l-inference.yaml") # config_unopenclip = os.path.join(sd_repo_configs_path, "v2-1-stable-unclip-h-inference.yaml") # config_inpainting = os.path.join(sd_configs_path, "v1-inpainting-inference.yaml") # config_instruct_pix2pix = os.path.join(sd_configs_path, "instruct-pix2pix.yaml") # config_alt_diffusion = os.path.join(sd_configs_path, "alt-diffusion-inference.yaml") # config_alt_diffusion_m18 = os.path.join(sd_configs_path, "alt-diffusion-m18-inference.yaml") # config_sd3 = os.path.join(sd_configs_path, "sd3-inference.yaml") # # # def is_using_v_parameterization_for_sd2(state_dict): # """ # Detects whether unet in state_dict is using v-parameterization. Returns True if it is. You're welcome. # """ # # import ldm.modules.diffusionmodules.openaimodel # # device = devices.device # # with sd_disable_initialization.DisableInitialization(): # unet = ldm.modules.diffusionmodules.openaimodel.UNetModel( # use_checkpoint=False, # use_fp16=False, # image_size=32, # in_channels=4, # out_channels=4, # model_channels=320, # attention_resolutions=[4, 2, 1], # num_res_blocks=2, # channel_mult=[1, 2, 4, 4], # num_head_channels=64, # use_spatial_transformer=True, # use_linear_in_transformer=True, # transformer_depth=1, # context_dim=1024, # legacy=False # ) # unet.eval() # # with torch.no_grad(): # unet_sd = {k.replace("model.diffusion_model.", ""): v for k, v in state_dict.items() if "model.diffusion_model." in k} # unet.load_state_dict(unet_sd, strict=True) # unet.to(device=device, dtype=devices.dtype_unet) # # test_cond = torch.ones((1, 2, 1024), device=device) * 0.5 # x_test = torch.ones((1, 4, 8, 8), device=device) * 0.5 # # with devices.autocast(): # out = (unet(x_test, torch.asarray([999], device=device), context=test_cond) - x_test).mean().cpu().item() # # return out < -1 # # # def guess_model_config_from_state_dict(sd, filename): # sd2_cond_proj_weight = sd.get('cond_stage_model.model.transformer.resblocks.0.attn.in_proj_weight', None) # diffusion_model_input = sd.get('model.diffusion_model.input_blocks.0.0.weight', None) # sd2_variations_weight = sd.get('embedder.model.ln_final.weight', None) # # if "model.diffusion_model.x_embedder.proj.weight" in sd: # return config_sd3 # # if sd.get('conditioner.embedders.1.model.ln_final.weight', None) is not None: # if diffusion_model_input.shape[1] == 9: # return config_sdxl_inpainting # else: # return config_sdxl # # if sd.get('conditioner.embedders.0.model.ln_final.weight', None) is not None: # return config_sdxl_refiner # elif sd.get('depth_model.model.pretrained.act_postprocess3.0.project.0.bias', None) is not None: # return config_depth_model # elif sd2_variations_weight is not None and sd2_variations_weight.shape[0] == 768: # return config_unclip # elif sd2_variations_weight is not None and sd2_variations_weight.shape[0] == 1024: # return config_unopenclip # # if sd2_cond_proj_weight is not None and sd2_cond_proj_weight.shape[1] == 1024: # if diffusion_model_input.shape[1] == 9: # return config_sd2_inpainting # # elif is_using_v_parameterization_for_sd2(sd): # # return config_sd2v # else: # return config_sd2v # # if diffusion_model_input is not None: # if diffusion_model_input.shape[1] == 9: # return config_inpainting # if diffusion_model_input.shape[1] == 8: # return config_instruct_pix2pix # # if sd.get('cond_stage_model.roberta.embeddings.word_embeddings.weight', None) is not None: # if sd.get('cond_stage_model.transformation.weight').size()[0] == 1024: # return config_alt_diffusion_m18 # return config_alt_diffusion # # return config_default # # # def find_checkpoint_config(state_dict, info): # if info is None: # return guess_model_config_from_state_dict(state_dict, "") # # config = find_checkpoint_config_near_filename(info) # if config is not None: # return config # # return guess_model_config_from_state_dict(state_dict, info.filename) # # # def find_checkpoint_config_near_filename(info): # if info is None: # return None # # config = f"{os.path.splitext(info.filename)[0]}.yaml" # if os.path.exists(config): # return config # # return None #