Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,10 +10,41 @@ from diffusers.utils import load_image
|
|
| 10 |
from diffusers import StableDiffusionXLImg2ImgPipeline, DPMSolverMultistepScheduler, AutoencoderTiny, StableDiffusionXLControlNetPipeline, ControlNetModel
|
| 11 |
from diffusers.utils import load_image
|
| 12 |
from diffusers.image_processor import IPAdapterMaskProcessor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 15 |
model_repo_id = "stabilityai/sdxl-turbo" #Replace to the model you would like to use
|
| 16 |
|
|
|
|
| 17 |
if torch.cuda.is_available():
|
| 18 |
torch_dtype = torch.float16
|
| 19 |
else:
|
|
@@ -31,7 +62,7 @@ controlnets = [
|
|
| 31 |
|
| 32 |
###load pipelines
|
| 33 |
|
| 34 |
-
pipe_CN = StableDiffusionXLControlNetPipeline.from_pretrained("SG161222/RealVisXL_V5.0", torch_dtype=torch.float16,controlnet=controlnets, use_safetensors=True, variant='fp16')
|
| 35 |
pipe_CN.vae = AutoencoderTiny.from_pretrained("madebyollin/taesdxl", torch_dtype=torch.float16)
|
| 36 |
pipe_CN.scheduler=DPMSolverMultistepScheduler.from_pretrained("SG161222/RealVisXL_V5.0",subfolder="scheduler",use_karras_sigmas=True)
|
| 37 |
pipe_CN.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
|
|
@@ -39,31 +70,52 @@ pipe_CN.to("cuda")
|
|
| 39 |
|
| 40 |
##############################load loras
|
| 41 |
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
pipe_CN.fuse_lora()
|
| 45 |
|
| 46 |
refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0",text_encoder_2=pipe_CN.text_encoder_2,vae=pipe_CN.vae,torch_dtype=torch.float16,use_safetensors=True,variant="fp16")
|
| 47 |
refiner.to("cuda")
|
| 48 |
|
|
|
|
| 49 |
|
|
|
|
| 50 |
|
| 51 |
MAX_SEED = np.iinfo(np.int32).max
|
| 52 |
MAX_IMAGE_SIZE = 1024
|
| 53 |
|
| 54 |
def ourhood_inference(prompt=str,num_inference_steps=int,scaffold=int,seed=int):
|
| 55 |
|
| 56 |
-
###pro_encode = pipe_cn.encode_text(prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
### function has no formats defined
|
| 59 |
|
| 60 |
scaff_dic={1:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_square_2.png",
|
|
|
|
| 61 |
'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_noroof_square.png",
|
| 62 |
'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_solo_square.png"},
|
| 63 |
2:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_C.png",
|
|
|
|
| 64 |
'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_C.png",
|
| 65 |
'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_C_solo.png"},
|
| 66 |
3:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_B.png",
|
|
|
|
| 67 |
'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_B.png",
|
| 68 |
'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_B_solo.png"}}
|
| 69 |
### mask init
|
|
@@ -72,53 +124,47 @@ def ourhood_inference(prompt=str,num_inference_steps=int,scaffold=int,seed=int):
|
|
| 72 |
output_width = 1024
|
| 73 |
|
| 74 |
mask1 = load_image(scaff_dic[scaffold]['mask1'])
|
|
|
|
| 75 |
|
| 76 |
masks = processor_mask.preprocess([mask1], height=output_height, width=output_width)
|
| 77 |
masks = [masks.reshape(1, masks.shape[0], masks.shape[2], masks.shape[3])]
|
| 78 |
|
| 79 |
-
###ip_images init
|
| 80 |
-
|
| 81 |
-
ip_img_1 = load_image("https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/25hours-hotel_25h_IndreBy_StephanLemke_Sauna1-1024x768.png")
|
| 82 |
-
|
| 83 |
-
ip_images = [[ip_img_1]]
|
| 84 |
-
pipe_CN.set_ip_adapter_scale([[0.5]])
|
| 85 |
-
|
| 86 |
-
n_steps = num_inference_steps
|
| 87 |
-
|
| 88 |
###precomputed depth image
|
| 89 |
|
| 90 |
depth_image = load_image(scaff_dic[scaffold]['depth_image'])
|
| 91 |
canny_image = load_image(scaff_dic[scaffold]['canny_image'])
|
|
|
|
| 92 |
|
| 93 |
images_CN = [depth_image, canny_image]
|
| 94 |
|
| 95 |
-
|
| 96 |
|
| 97 |
### inference
|
| 98 |
|
|
|
|
|
|
|
| 99 |
generator = torch.Generator(device="cuda").manual_seed(seed)
|
| 100 |
|
| 101 |
results = pipe_CN(
|
| 102 |
-
prompt=
|
| 103 |
-
|
| 104 |
negative_prompt="deformed, ugly, wrong proportion, low res, worst quality, low quality,text,watermark",
|
| 105 |
generator=generator,
|
| 106 |
num_inference_steps=n_steps,
|
| 107 |
num_images_per_prompt=1,
|
| 108 |
denoising_end=0.95,
|
| 109 |
-
image=
|
| 110 |
output_type="latent",
|
| 111 |
-
control_guidance_start=[0.0, 0.35],
|
| 112 |
-
control_guidance_end=[0.35,
|
| 113 |
-
controlnet_conditioning_scale=[0.
|
| 114 |
cross_attention_kwargs={"ip_adapter_masks": masks}
|
| 115 |
).images[0]
|
| 116 |
|
| 117 |
|
| 118 |
image = refiner(
|
| 119 |
-
prompt=
|
| 120 |
generator=generator,
|
| 121 |
-
num_inference_steps=
|
| 122 |
denoising_start=0.95,
|
| 123 |
image=results,
|
| 124 |
).images[0]
|
|
@@ -130,10 +176,10 @@ def ourhood_inference(prompt=str,num_inference_steps=int,scaffold=int,seed=int):
|
|
| 130 |
#@spaces.GPU #[uncomment to use ZeroGPU]
|
| 131 |
|
| 132 |
examples = [
|
| 133 |
-
"
|
| 134 |
-
"
|
| 135 |
-
"
|
| 136 |
-
"
|
| 137 |
|
| 138 |
css="""
|
| 139 |
#col-container {
|
|
@@ -152,10 +198,10 @@ with gr.Blocks(css=css) as demo:
|
|
| 152 |
with gr.Row():
|
| 153 |
|
| 154 |
prompt = gr.Text(
|
| 155 |
-
label="
|
| 156 |
show_label=False,
|
| 157 |
max_lines=1,
|
| 158 |
-
placeholder="
|
| 159 |
container=False,
|
| 160 |
)
|
| 161 |
|
|
@@ -174,7 +220,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 174 |
)
|
| 175 |
|
| 176 |
seed = gr.Slider(
|
| 177 |
-
label="
|
| 178 |
minimum=0,
|
| 179 |
maximum=MAX_SEED,
|
| 180 |
step=1,
|
|
@@ -186,11 +232,11 @@ with gr.Blocks(css=css) as demo:
|
|
| 186 |
|
| 187 |
|
| 188 |
num_inference_steps = gr.Slider(
|
| 189 |
-
label="
|
| 190 |
minimum=35,
|
| 191 |
-
maximum=
|
| 192 |
step=1,
|
| 193 |
-
value=
|
| 194 |
)
|
| 195 |
|
| 196 |
gr.Examples(
|
|
|
|
| 10 |
from diffusers import StableDiffusionXLImg2ImgPipeline, DPMSolverMultistepScheduler, AutoencoderTiny, StableDiffusionXLControlNetPipeline, ControlNetModel
|
| 11 |
from diffusers.utils import load_image
|
| 12 |
from diffusers.image_processor import IPAdapterMaskProcessor
|
| 13 |
+
from torch import nn
|
| 14 |
+
|
| 15 |
+
### auxiliary functions
|
| 16 |
+
|
| 17 |
+
def ip_guide(guide, pool):
|
| 18 |
+
distances = []
|
| 19 |
+
cos = nn.CosineSimilarity(dim=1, eps=1e-6)
|
| 20 |
+
for embed in pool:
|
| 21 |
+
dist = cos(guide, embed.to('cuda'))
|
| 22 |
+
distances.append(dist)
|
| 23 |
+
### find the indexes of the top 5 embeddings
|
| 24 |
+
indexed_distances = list(enumerate(distances))
|
| 25 |
+
# Sort the list of pairs based on the scores
|
| 26 |
+
sorted_distances = sorted(indexed_distances, key=lambda x: x[1])
|
| 27 |
+
# Extract the indexes of the lowest scores
|
| 28 |
+
lowest_indexes = [index for index, score in sorted_distances[:5]]
|
| 29 |
+
|
| 30 |
+
### return the embeddings with lowest_indexes
|
| 31 |
+
return [pool[i] for i in lowest_indexes], lowest_indexes
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def make_inpaint_condition(image, image_mask):
|
| 35 |
+
image = np.array(image.convert("RGB")).astype(np.float32) / 255.0
|
| 36 |
+
image_mask = np.array(image_mask.convert("L")).astype(np.float32) / 255.0
|
| 37 |
+
|
| 38 |
+
assert image.shape[0:1] == image_mask.shape[0:1]
|
| 39 |
+
image[image_mask > 0.5] = -1.0 # set as masked pixel
|
| 40 |
+
image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
|
| 41 |
+
image = torch.from_numpy(image)
|
| 42 |
+
return image
|
| 43 |
|
| 44 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 45 |
model_repo_id = "stabilityai/sdxl-turbo" #Replace to the model you would like to use
|
| 46 |
|
| 47 |
+
|
| 48 |
if torch.cuda.is_available():
|
| 49 |
torch_dtype = torch.float16
|
| 50 |
else:
|
|
|
|
| 62 |
|
| 63 |
###load pipelines
|
| 64 |
|
| 65 |
+
pipe_CN = StableDiffusionXLControlNetPipeline.from_pretrained("SG161222/RealVisXL_V5.0", torch_dtype=torch.float16,controlnet=[controlnets[0],controlnets[0],controlnets[1]], use_safetensors=True, variant='fp16')
|
| 66 |
pipe_CN.vae = AutoencoderTiny.from_pretrained("madebyollin/taesdxl", torch_dtype=torch.float16)
|
| 67 |
pipe_CN.scheduler=DPMSolverMultistepScheduler.from_pretrained("SG161222/RealVisXL_V5.0",subfolder="scheduler",use_karras_sigmas=True)
|
| 68 |
pipe_CN.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
|
|
|
|
| 70 |
|
| 71 |
##############################load loras
|
| 72 |
|
| 73 |
+
state_dict, network_alphas = StableDiffusionXLControlNetPipeline.lora_state_dict('Tonioesparza/ourhood_training_dreambooth_lora_2_0', weight_name='pytorch_lora_weights.safetensors')
|
| 74 |
+
pipe_CN.load_lora_into_unet(state_dict, network_alphas, pipe_CN.unet, adapter_name='unet_ourhood')
|
| 75 |
+
pipe_CN.load_lora_into_text_encoder(state_dict, network_alphas, pipe_CN.text_encoder, adapter_name='text_ourhood')
|
| 76 |
+
pipe_CN.load_lora_into_text_encoder(state_dict, network_alphas, pipe_CN.text_encoder ,prefix='2', adapter_name='text_2_ourhood')
|
| 77 |
+
pipe_CN.set_adapters(["unet_ourhood","text_ourhood","text_2_ourhood"], adapter_weights=[1.0, 1.0,1.0])
|
| 78 |
+
|
| 79 |
pipe_CN.fuse_lora()
|
| 80 |
|
| 81 |
refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0",text_encoder_2=pipe_CN.text_encoder_2,vae=pipe_CN.vae,torch_dtype=torch.float16,use_safetensors=True,variant="fp16")
|
| 82 |
refiner.to("cuda")
|
| 83 |
|
| 84 |
+
ip_pool = torch.load("https://huggingface.co/spaces/Tonioesparza/hb8_ourhood_pilot/resolve/main/embeds_cases_for_ip.pt")
|
| 85 |
|
| 86 |
+
pool = list(ip_pool.values())
|
| 87 |
|
| 88 |
MAX_SEED = np.iinfo(np.int32).max
|
| 89 |
MAX_IMAGE_SIZE = 1024
|
| 90 |
|
| 91 |
def ourhood_inference(prompt=str,num_inference_steps=int,scaffold=int,seed=int):
|
| 92 |
|
| 93 |
+
###pro_encode = pipe_cn.encode_text(prompt) ###ip_images init
|
| 94 |
+
|
| 95 |
+
guide = pipe_CN.encode_prompt(prompt)
|
| 96 |
+
|
| 97 |
+
closest,indexes = ip_guide(guide[2],pool)
|
| 98 |
+
|
| 99 |
+
print( [list(ip_pool.keys())[i] for i in indexes])
|
| 100 |
+
|
| 101 |
+
ip_embeds = torch.cat([torch.unsqueeze(torch.zeros_like(closest[0]),0),torch.unsqueeze(closest[0],0)],0).to(dtype=torch.float16,device='cuda')
|
| 102 |
+
|
| 103 |
+
pipe_CN.set_ip_adapter_scale([[0.5]])
|
| 104 |
+
|
| 105 |
+
prompt1 = 'A frontpage photograph of an ourhood privacy booth, silken oak frame, taupe exterior fabric, taupe interior fabric, in ' + prompt
|
| 106 |
|
| 107 |
### function has no formats defined
|
| 108 |
|
| 109 |
scaff_dic={1:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_square_2.png",
|
| 110 |
+
'mask2':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_out_square_2.png",
|
| 111 |
'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_noroof_square.png",
|
| 112 |
'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_solo_square.png"},
|
| 113 |
2:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_C.png",
|
| 114 |
+
'mask2':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_out_C.png",
|
| 115 |
'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_C.png",
|
| 116 |
'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_C_solo.png"},
|
| 117 |
3:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_B.png",
|
| 118 |
+
'mask2':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_out_B.png",
|
| 119 |
'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_B.png",
|
| 120 |
'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_B_solo.png"}}
|
| 121 |
### mask init
|
|
|
|
| 124 |
output_width = 1024
|
| 125 |
|
| 126 |
mask1 = load_image(scaff_dic[scaffold]['mask1'])
|
| 127 |
+
mask2 = load_image(scaff_dic[scaffold]['mask2'])
|
| 128 |
|
| 129 |
masks = processor_mask.preprocess([mask1], height=output_height, width=output_width)
|
| 130 |
masks = [masks.reshape(1, masks.shape[0], masks.shape[2], masks.shape[3])]
|
| 131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
###precomputed depth image
|
| 133 |
|
| 134 |
depth_image = load_image(scaff_dic[scaffold]['depth_image'])
|
| 135 |
canny_image = load_image(scaff_dic[scaffold]['canny_image'])
|
| 136 |
+
masked_depth=make_inpaint_condition(depth_image,mask2)
|
| 137 |
|
| 138 |
images_CN = [depth_image, canny_image]
|
| 139 |
|
|
|
|
| 140 |
|
| 141 |
### inference
|
| 142 |
|
| 143 |
+
n_steps = num_inference_steps
|
| 144 |
+
|
| 145 |
generator = torch.Generator(device="cuda").manual_seed(seed)
|
| 146 |
|
| 147 |
results = pipe_CN(
|
| 148 |
+
prompt=prompt1,
|
| 149 |
+
ip_adapter_image_embeds = [ip_embeds],
|
| 150 |
negative_prompt="deformed, ugly, wrong proportion, low res, worst quality, low quality,text,watermark",
|
| 151 |
generator=generator,
|
| 152 |
num_inference_steps=n_steps,
|
| 153 |
num_images_per_prompt=1,
|
| 154 |
denoising_end=0.95,
|
| 155 |
+
image=[depth_image,masked_depth,canny_image],
|
| 156 |
output_type="latent",
|
| 157 |
+
control_guidance_start=[0.0, 0.35, 0.35],
|
| 158 |
+
control_guidance_end=[0.35, 0.95, 0.95],
|
| 159 |
+
controlnet_conditioning_scale=[0.35,0.45, 0.65],
|
| 160 |
cross_attention_kwargs={"ip_adapter_masks": masks}
|
| 161 |
).images[0]
|
| 162 |
|
| 163 |
|
| 164 |
image = refiner(
|
| 165 |
+
prompt=prompt1,
|
| 166 |
generator=generator,
|
| 167 |
+
num_inference_steps=n_steps,
|
| 168 |
denoising_start=0.95,
|
| 169 |
image=results,
|
| 170 |
).images[0]
|
|
|
|
| 176 |
#@spaces.GPU #[uncomment to use ZeroGPU]
|
| 177 |
|
| 178 |
examples = [
|
| 179 |
+
"in a British museum, pavillion, masonry, high-tables and chairs",
|
| 180 |
+
"in a high ceilinged atrium, glass front, plantwalls, concrete floor, furniture, golden hour",
|
| 181 |
+
"in a colorful open office environment",
|
| 182 |
+
" in a Nordic atrium environment"]
|
| 183 |
|
| 184 |
css="""
|
| 185 |
#col-container {
|
|
|
|
| 198 |
with gr.Row():
|
| 199 |
|
| 200 |
prompt = gr.Text(
|
| 201 |
+
label="Setting prompt",
|
| 202 |
show_label=False,
|
| 203 |
max_lines=1,
|
| 204 |
+
placeholder="Where do you want to show the Ourhood pod?",
|
| 205 |
container=False,
|
| 206 |
)
|
| 207 |
|
|
|
|
| 220 |
)
|
| 221 |
|
| 222 |
seed = gr.Slider(
|
| 223 |
+
label="Tracking number (seed)",
|
| 224 |
minimum=0,
|
| 225 |
maximum=MAX_SEED,
|
| 226 |
step=1,
|
|
|
|
| 232 |
|
| 233 |
|
| 234 |
num_inference_steps = gr.Slider(
|
| 235 |
+
label="Detail steps",
|
| 236 |
minimum=35,
|
| 237 |
+
maximum=75,
|
| 238 |
step=1,
|
| 239 |
+
value=50, #Replace with defaults that work for your model
|
| 240 |
)
|
| 241 |
|
| 242 |
gr.Examples(
|