Spaces:

Tonioesparza
/

hb8_ourhood_pilot

Sleeping

App Files Files Community

Tonioesparza commited on Oct 22, 2024

Commit

64ff68e

verified ·

1 Parent(s): b8d63eb

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -32

app.py CHANGED Viewed

@@ -10,10 +10,41 @@ from diffusers.utils import load_image
 from diffusers import StableDiffusionXLImg2ImgPipeline, DPMSolverMultistepScheduler, AutoencoderTiny, StableDiffusionXLControlNetPipeline, ControlNetModel
 from diffusers.utils import load_image
 from diffusers.image_processor import IPAdapterMaskProcessor
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_repo_id = "stabilityai/sdxl-turbo" #Replace to the model you would like to use
 if torch.cuda.is_available():
     torch_dtype = torch.float16
 else:
@@ -31,7 +62,7 @@ controlnets = [
 ###load pipelines
-pipe_CN = StableDiffusionXLControlNetPipeline.from_pretrained("SG161222/RealVisXL_V5.0", torch_dtype=torch.float16,controlnet=controlnets, use_safetensors=True, variant='fp16')
 pipe_CN.vae = AutoencoderTiny.from_pretrained("madebyollin/taesdxl", torch_dtype=torch.float16)
 pipe_CN.scheduler=DPMSolverMultistepScheduler.from_pretrained("SG161222/RealVisXL_V5.0",subfolder="scheduler",use_karras_sigmas=True)
 pipe_CN.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
@@ -39,31 +70,52 @@ pipe_CN.to("cuda")
 ##############################load loras
-pipe_CN.load_lora_weights('Tonioesparza/ourhood_training_dreambooth_lora_2_0', weight_name='pytorch_lora_weights.safetensors',adapter_name='ourhood')
-###pipe_CN.set_adapters(['ourhood'],[0.98])
 pipe_CN.fuse_lora()
 refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0",text_encoder_2=pipe_CN.text_encoder_2,vae=pipe_CN.vae,torch_dtype=torch.float16,use_safetensors=True,variant="fp16")
 refiner.to("cuda")
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
 def ourhood_inference(prompt=str,num_inference_steps=int,scaffold=int,seed=int):
-###pro_encode = pipe_cn.encode_text(prompt)
 ### function has no formats defined
     scaff_dic={1:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_square_2.png",
                   'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_noroof_square.png",
                   'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_solo_square.png"},
                2:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_C.png",
                   'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_C.png",
                   'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_C_solo.png"},
                3:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_B.png",
                   'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_B.png",
                   'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_B_solo.png"}}
 ### mask init
@@ -72,53 +124,47 @@ def ourhood_inference(prompt=str,num_inference_steps=int,scaffold=int,seed=int):
     output_width = 1024
     mask1 = load_image(scaff_dic[scaffold]['mask1'])
     masks = processor_mask.preprocess([mask1], height=output_height, width=output_width)
     masks = [masks.reshape(1, masks.shape[0], masks.shape[2], masks.shape[3])]
-###ip_images init
-    ip_img_1 = load_image("https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/25hours-hotel_25h_IndreBy_StephanLemke_Sauna1-1024x768.png")
-    ip_images = [[ip_img_1]]
-    pipe_CN.set_ip_adapter_scale([[0.5]])
-    n_steps = num_inference_steps
 ###precomputed depth image
     depth_image = load_image(scaff_dic[scaffold]['depth_image'])
     canny_image = load_image(scaff_dic[scaffold]['canny_image'])
     images_CN = [depth_image, canny_image]
 ### inference
     generator = torch.Generator(device="cuda").manual_seed(seed)
     results = pipe_CN(
-        prompt=prompt,
-        ip_adapter_image=ip_images,
         negative_prompt="deformed, ugly, wrong proportion, low res, worst quality, low quality,text,watermark",
         generator=generator,
         num_inference_steps=n_steps,
         num_images_per_prompt=1,
         denoising_end=0.95,
-        image=images_CN,
         output_type="latent",
-        control_guidance_start=[0.0, 0.35],
-        control_guidance_end=[0.35, 1.0],
-        controlnet_conditioning_scale=[0.5, 1.0],
         cross_attention_kwargs={"ip_adapter_masks": masks}
     ).images[0]
     image = refiner(
-        prompt=prompt,
         generator=generator,
-        num_inference_steps=num_inference_steps,
         denoising_start=0.95,
         image=results,
     ).images[0]
@@ -130,10 +176,10 @@ def ourhood_inference(prompt=str,num_inference_steps=int,scaffold=int,seed=int):
 #@spaces.GPU #[uncomment to use ZeroGPU]
 examples = [
-    "A photograph, of an Ourhood privacy booth, front view, in a warehouse eventspace environment, in the style of event photography, silken oak frame, checkered warm grey exterior fabric, checkered warm grey interior fabric, curtains, diner seating, pillows",
-    "A photograph, of an Ourhood privacy booth, side view, in a warehouse eventspace environment, in the style of event photography, silken oak frame, taupe exterior fabric",
-    "A photograph, of an Ourhood privacy booth, close-up, in a HolmrisB8_HQ office environment, in the style of makeshift photoshoot, silken oak frame, taupe exterior fabric, taupe interior fabric, pillows",
-    "A rendering, of an Ourhood privacy booth, front view, in a Nordic atrium environment, in the style of Keyshot, silken oak frame, taupe exterior fabric, taupe interior fabric, diner seating"]
 css="""
 #col-container {
@@ -152,10 +198,10 @@ with gr.Blocks(css=css) as demo:
         with gr.Row():
             prompt = gr.Text(
-                label="Prompt",
                 show_label=False,
                 max_lines=1,
-                placeholder="Enter your prompt",
                 container=False,
             )
@@ -174,7 +220,7 @@ with gr.Blocks(css=css) as demo:
             )
             seed = gr.Slider(
-                label="tracking number (seed)",
                 minimum=0,
                 maximum=MAX_SEED,
                 step=1,
@@ -186,11 +232,11 @@ with gr.Blocks(css=css) as demo:
                 num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
                     minimum=35,
-                    maximum=50,
                     step=1,
-                    value=35, #Replace with defaults that work for your model
                 )
         gr.Examples(

 from diffusers import StableDiffusionXLImg2ImgPipeline, DPMSolverMultistepScheduler, AutoencoderTiny, StableDiffusionXLControlNetPipeline, ControlNetModel
 from diffusers.utils import load_image
 from diffusers.image_processor import IPAdapterMaskProcessor
+from torch import nn
+### auxiliary functions
+def ip_guide(guide, pool):
+    distances = []
+    cos = nn.CosineSimilarity(dim=1, eps=1e-6)
+    for embed in pool:
+        dist = cos(guide, embed.to('cuda'))
+        distances.append(dist)
+    ### find the indexes of the top 5 embeddings
+    indexed_distances = list(enumerate(distances))
+    # Sort the list of pairs based on the scores
+    sorted_distances = sorted(indexed_distances, key=lambda x: x[1])
+    # Extract the indexes of the lowest scores
+    lowest_indexes = [index for index, score in sorted_distances[:5]]
+    ### return the embeddings with lowest_indexes
+    return [pool[i] for i in lowest_indexes], lowest_indexes
+def make_inpaint_condition(image, image_mask):
+    image = np.array(image.convert("RGB")).astype(np.float32) / 255.0
+    image_mask = np.array(image_mask.convert("L")).astype(np.float32) / 255.0
+    assert image.shape[0:1] == image_mask.shape[0:1]
+    image[image_mask > 0.5] = -1.0  # set as masked pixel
+    image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
+    image = torch.from_numpy(image)
+    return image
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_repo_id = "stabilityai/sdxl-turbo" #Replace to the model you would like to use
 if torch.cuda.is_available():
     torch_dtype = torch.float16
 else:
 ###load pipelines
+pipe_CN = StableDiffusionXLControlNetPipeline.from_pretrained("SG161222/RealVisXL_V5.0", torch_dtype=torch.float16,controlnet=[controlnets[0],controlnets[0],controlnets[1]], use_safetensors=True, variant='fp16')
 pipe_CN.vae = AutoencoderTiny.from_pretrained("madebyollin/taesdxl", torch_dtype=torch.float16)
 pipe_CN.scheduler=DPMSolverMultistepScheduler.from_pretrained("SG161222/RealVisXL_V5.0",subfolder="scheduler",use_karras_sigmas=True)
 pipe_CN.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
 ##############################load loras
+state_dict, network_alphas = StableDiffusionXLControlNetPipeline.lora_state_dict('Tonioesparza/ourhood_training_dreambooth_lora_2_0', weight_name='pytorch_lora_weights.safetensors')
+pipe_CN.load_lora_into_unet(state_dict, network_alphas, pipe_CN.unet, adapter_name='unet_ourhood')
+pipe_CN.load_lora_into_text_encoder(state_dict, network_alphas, pipe_CN.text_encoder, adapter_name='text_ourhood')
+pipe_CN.load_lora_into_text_encoder(state_dict, network_alphas, pipe_CN.text_encoder ,prefix='2', adapter_name='text_2_ourhood')
+pipe_CN.set_adapters(["unet_ourhood","text_ourhood","text_2_ourhood"], adapter_weights=[1.0, 1.0,1.0])
 pipe_CN.fuse_lora()
 refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0",text_encoder_2=pipe_CN.text_encoder_2,vae=pipe_CN.vae,torch_dtype=torch.float16,use_safetensors=True,variant="fp16")
 refiner.to("cuda")
+ip_pool = torch.load("https://huggingface.co/spaces/Tonioesparza/hb8_ourhood_pilot/resolve/main/embeds_cases_for_ip.pt")
+pool = list(ip_pool.values())
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
 def ourhood_inference(prompt=str,num_inference_steps=int,scaffold=int,seed=int):
+###pro_encode = pipe_cn.encode_text(prompt) ###ip_images init
+    guide = pipe_CN.encode_prompt(prompt)
+    closest,indexes = ip_guide(guide[2],pool)
+    print( [list(ip_pool.keys())[i] for i in indexes])
+    ip_embeds = torch.cat([torch.unsqueeze(torch.zeros_like(closest[0]),0),torch.unsqueeze(closest[0],0)],0).to(dtype=torch.float16,device='cuda')
+    pipe_CN.set_ip_adapter_scale([[0.5]])
+    prompt1 = 'A frontpage photograph of an ourhood privacy booth, silken oak frame, taupe exterior fabric, taupe interior fabric, in ' + prompt
 ### function has no formats defined
     scaff_dic={1:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_square_2.png",
+                  'mask2':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_out_square_2.png",
                   'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_noroof_square.png",
                   'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_solo_square.png"},
                2:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_C.png",
+                  'mask2':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_out_C.png",
                   'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_C.png",
                   'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_C_solo.png"},
                3:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_B.png",
+                  'mask2':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_out_B.png",
                   'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_B.png",
                   'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_B_solo.png"}}
 ### mask init
     output_width = 1024
     mask1 = load_image(scaff_dic[scaffold]['mask1'])
+    mask2 = load_image(scaff_dic[scaffold]['mask2'])
     masks = processor_mask.preprocess([mask1], height=output_height, width=output_width)
     masks = [masks.reshape(1, masks.shape[0], masks.shape[2], masks.shape[3])]
 ###precomputed depth image
     depth_image = load_image(scaff_dic[scaffold]['depth_image'])
     canny_image = load_image(scaff_dic[scaffold]['canny_image'])
+    masked_depth=make_inpaint_condition(depth_image,mask2)
     images_CN = [depth_image, canny_image]
 ### inference
+    n_steps = num_inference_steps
     generator = torch.Generator(device="cuda").manual_seed(seed)
     results = pipe_CN(
+        prompt=prompt1,
+        ip_adapter_image_embeds = [ip_embeds],
         negative_prompt="deformed, ugly, wrong proportion, low res, worst quality, low quality,text,watermark",
         generator=generator,
         num_inference_steps=n_steps,
         num_images_per_prompt=1,
         denoising_end=0.95,
+        image=[depth_image,masked_depth,canny_image],
         output_type="latent",
+        control_guidance_start=[0.0, 0.35, 0.35],
+        control_guidance_end=[0.35, 0.95, 0.95],
+        controlnet_conditioning_scale=[0.35,0.45, 0.65],
         cross_attention_kwargs={"ip_adapter_masks": masks}
     ).images[0]
     image = refiner(
+        prompt=prompt1,
         generator=generator,
+        num_inference_steps=n_steps,
         denoising_start=0.95,
         image=results,
     ).images[0]
 #@spaces.GPU #[uncomment to use ZeroGPU]
 examples = [
+    "in a British museum, pavillion, masonry, high-tables and chairs",
+    "in a high ceilinged atrium, glass front, plantwalls, concrete floor, furniture, golden hour",
+    "in a colorful open office environment",
+    " in a Nordic atrium environment"]
 css="""
 #col-container {
         with gr.Row():
             prompt = gr.Text(
+                label="Setting prompt",
                 show_label=False,
                 max_lines=1,
+                placeholder="Where do you want to show the Ourhood pod?",
                 container=False,
             )
             )
             seed = gr.Slider(
+                label="Tracking number (seed)",
                 minimum=0,
                 maximum=MAX_SEED,
                 step=1,
                 num_inference_steps = gr.Slider(
+                    label="Detail steps",
                     minimum=35,
+                    maximum=75,
                     step=1,
+                    value=50, #Replace with defaults that work for your model
                 )
         gr.Examples(