Tonioesparza's picture
Update app.py
a486e08 verified
raw
history blame
9.89 kB
import gradio as gr
import numpy as np
import random
#import spaces #[uncomment to use ZeroGPU]
import os
from PIL import Image, ImageDraw, ImageFont
import torch
from PIL import Image
from diffusers.utils import load_image
from diffusers import DPMSolverSDEScheduler
from diffusers import StableDiffusionXLImg2ImgPipeline, DPMSolverMultistepScheduler, AutoencoderTiny, StableDiffusionXLControlNetPipeline, ControlNetModel
from diffusers.utils import load_image
from diffusers.image_processor import IPAdapterMaskProcessor
from torch import nn
### auxiliary functions
def ip_guide(guide, pool):
distances = []
cos = nn.CosineSimilarity(dim=1, eps=1e-6)
for embed in pool:
dist = cos(guide, embed.to('cuda'))
distances.append(dist)
### find the indexes of the top 5 embeddings
indexed_distances = list(enumerate(distances))
# Sort the list of pairs based on the scores
sorted_distances = sorted(indexed_distances, key=lambda x: x[1])
# Extract the indexes of the lowest scores
lowest_indexes = [index for index, score in sorted_distances[:5]]
### return the embeddings with lowest_indexes
return [pool[i] for i in lowest_indexes], lowest_indexes
def make_inpaint_condition(image, image_mask):
image = np.array(image.convert("RGB")).astype(np.float32) / 255.0
image_mask = np.array(image_mask.convert("L")).astype(np.float32) / 255.0
assert image.shape[0:1] == image_mask.shape[0:1]
image[image_mask > 0.5] = -1.0 # set as masked pixel
image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
image = torch.from_numpy(image)
return image
device = "cuda" if torch.cuda.is_available() else "cpu"
model_repo_id = "stabilityai/sdxl-turbo" #Replace to the model you would like to use
if torch.cuda.is_available():
torch_dtype = torch.float16
else:
torch_dtype = torch.float32
processor_mask = IPAdapterMaskProcessor()
controlnets = [
ControlNetModel.from_pretrained(
"diffusers/controlnet-depth-sdxl-1.0",variant="fp16",use_safetensors=True,torch_dtype=torch.float16
),
ControlNetModel.from_pretrained(
"diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16, use_safetensors=True,variant="fp16"
),
]
###load pipelines
pipe_CN = StableDiffusionXLControlNetPipeline.from_pretrained("SG161222/RealVisXL_V5.0", torch_dtype=torch.float16,controlnet=[controlnets[0],controlnets[0],controlnets[1]], use_safetensors=True, variant='fp16')
###pipe_CN.vae = AutoencoderTiny.from_pretrained("madebyollin/taesdxl", torch_dtype=torch.float16)
pipe_CN.scheduler=DPMSolverSDEScheduler.from_pretrained("SG161222/RealVisXL_V5.0",subfolder="scheduler",use_karras_sigmas=True)
pipe_CN.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
pipe_CN.to("cuda")
##############################load loras
state_dict, network_alphas = StableDiffusionXLControlNetPipeline.lora_state_dict('Tonioesparza/ourhood_training_dreambooth_lora_2_0', weight_name='pytorch_lora_weights.safetensors')
pipe_CN.load_lora_into_unet(state_dict, network_alphas, pipe_CN.unet, adapter_name='unet_ourhood')
pipe_CN.load_lora_into_text_encoder(state_dict, network_alphas, pipe_CN.text_encoder, adapter_name='text_ourhood')
pipe_CN.load_lora_into_text_encoder(state_dict, network_alphas, pipe_CN.text_encoder ,prefix='2', adapter_name='text_2_ourhood')
pipe_CN.set_adapters(["unet_ourhood","text_ourhood","text_2_ourhood"], adapter_weights=[1.0, 1.0,1.0])
pipe_CN.fuse_lora()
refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0",text_encoder_2=pipe_CN.text_encoder_2,vae=pipe_CN.vae,torch_dtype=torch.float16,use_safetensors=True,variant="fp16")
refiner.to("cuda")
ip_pool = torch.load("./embeds_cases_for_ip.pt")
pool = list(ip_pool.values())
MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024
def ourhood_inference(prompt=str,num_inference_steps=int,scaffold=int,seed=int):
###pro_encode = pipe_cn.encode_text(prompt) ###ip_images init
guide = pipe_CN.encode_prompt(prompt)
closest,indexes = ip_guide(guide[2],pool)
print( [list(ip_pool.keys())[i] for i in indexes])
ip_embeds = torch.cat([torch.unsqueeze(torch.zeros_like(closest[0]),0),torch.unsqueeze(closest[0],0)],0).to(dtype=torch.float16,device='cuda')
pipe_CN.set_ip_adapter_scale([[0.5]])
prompt1 = 'A photograph, of an OurHood privacy booth, with a silken oak frame, hickory stained melange polyester fabric, in ' + prompt
### function has no formats defined
scaff_dic={1:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_square_2.png",
'mask2':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_out_square_2.png",
'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_noroof_square.png",
'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_solo_square.png"},
2:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_C.png",
'mask2':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_out_C.png",
'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_C.png",
'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_C_solo.png"},
3:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_B.png",
'mask2':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_out_B.png",
'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_B.png",
'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_B_solo.png"}}
### mask init
output_height = 1024
output_width = 1024
mask1 = load_image(scaff_dic[scaffold]['mask1'])
mask2 = load_image(scaff_dic[scaffold]['mask2'])
masks = processor_mask.preprocess([mask1], height=output_height, width=output_width)
masks = [masks.reshape(1, masks.shape[0], masks.shape[2], masks.shape[3])]
###precomputed depth image
depth_image = load_image(scaff_dic[scaffold]['depth_image'])
canny_image = load_image(scaff_dic[scaffold]['canny_image'])
masked_depth=make_inpaint_condition(depth_image,mask2)
images_CN = [depth_image, canny_image]
### inference
n_steps = num_inference_steps
generator = torch.Generator(device="cuda").manual_seed(seed)
results = pipe_CN(
prompt=prompt1,
ip_adapter_image_embeds = [ip_embeds],
negative_prompt="deformed, ugly, wrong proportion, low res, worst quality, low quality,text,watermark",
generator=generator,
num_inference_steps=n_steps,
num_images_per_prompt=1,
denoising_end=0.95,
image=[depth_image,masked_depth,canny_image],
output_type="latent",
control_guidance_start=[0.0, 0.35, 0.35],
control_guidance_end=[0.35, 0.95, 0.95],
controlnet_conditioning_scale=[0.35,0.95, 0.95],
cross_attention_kwargs={"ip_adapter_masks": masks}
).images[0]
image = refiner(
prompt=prompt1,
generator=generator,
num_inference_steps=n_steps,
denoising_start=0.95,
image=results,
).images[0]
return image
#@spaces.GPU #[uncomment to use ZeroGPU]
examples = [
"in a British museum, pavillion, masonry, high-tables and chairs",
"in a high ceilinged atrium, glass front, plantwalls, concrete floor, furniture, golden hour",
"in a colorful open office environment",
" in a Nordic atrium environment"]
css="""
#col-container {
margin: 0 auto;
max-width: 640px;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown(f"""
# HB8-Ourhood inference test
""")
with gr.Row():
prompt = gr.Text(
label="Setting prompt",
show_label=False,
max_lines=1,
placeholder="Where do you want to show the Ourhood pod?",
container=False,
)
run_button = gr.Button("Run", scale=0)
result = gr.Image(label="Result", show_label=False)
with gr.Accordion("Advanced Settings", open=False):
perspective = gr.Slider(
label="perspective",
minimum=1,
maximum=3,
step=1,
value=1,
)
seed = gr.Slider(
label="Tracking number (seed)",
minimum=0,
maximum=MAX_SEED,
step=1,
value=0,
)
with gr.Row():
num_inference_steps = gr.Slider(
label="Detail steps",
minimum=35,
maximum=75,
step=1,
value=50, #Replace with defaults that work for your model
)
gr.Examples(
examples = examples,
inputs = [prompt]
)
gr.on(
triggers=[run_button.click, prompt.submit],
fn = ourhood_inference,
inputs = [prompt, num_inference_steps, perspective, seed],
outputs = [result]
)
demo.queue().launch()