Tonioesparza commited on
Commit
64ff68e
·
verified ·
1 Parent(s): b8d63eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -32
app.py CHANGED
@@ -10,10 +10,41 @@ from diffusers.utils import load_image
10
  from diffusers import StableDiffusionXLImg2ImgPipeline, DPMSolverMultistepScheduler, AutoencoderTiny, StableDiffusionXLControlNetPipeline, ControlNetModel
11
  from diffusers.utils import load_image
12
  from diffusers.image_processor import IPAdapterMaskProcessor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
  model_repo_id = "stabilityai/sdxl-turbo" #Replace to the model you would like to use
16
 
 
17
  if torch.cuda.is_available():
18
  torch_dtype = torch.float16
19
  else:
@@ -31,7 +62,7 @@ controlnets = [
31
 
32
  ###load pipelines
33
 
34
- pipe_CN = StableDiffusionXLControlNetPipeline.from_pretrained("SG161222/RealVisXL_V5.0", torch_dtype=torch.float16,controlnet=controlnets, use_safetensors=True, variant='fp16')
35
  pipe_CN.vae = AutoencoderTiny.from_pretrained("madebyollin/taesdxl", torch_dtype=torch.float16)
36
  pipe_CN.scheduler=DPMSolverMultistepScheduler.from_pretrained("SG161222/RealVisXL_V5.0",subfolder="scheduler",use_karras_sigmas=True)
37
  pipe_CN.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
@@ -39,31 +70,52 @@ pipe_CN.to("cuda")
39
 
40
  ##############################load loras
41
 
42
- pipe_CN.load_lora_weights('Tonioesparza/ourhood_training_dreambooth_lora_2_0', weight_name='pytorch_lora_weights.safetensors',adapter_name='ourhood')
43
- ###pipe_CN.set_adapters(['ourhood'],[0.98])
 
 
 
 
44
  pipe_CN.fuse_lora()
45
 
46
  refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0",text_encoder_2=pipe_CN.text_encoder_2,vae=pipe_CN.vae,torch_dtype=torch.float16,use_safetensors=True,variant="fp16")
47
  refiner.to("cuda")
48
 
 
49
 
 
50
 
51
  MAX_SEED = np.iinfo(np.int32).max
52
  MAX_IMAGE_SIZE = 1024
53
 
54
  def ourhood_inference(prompt=str,num_inference_steps=int,scaffold=int,seed=int):
55
 
56
- ###pro_encode = pipe_cn.encode_text(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  ### function has no formats defined
59
 
60
  scaff_dic={1:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_square_2.png",
 
61
  'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_noroof_square.png",
62
  'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_solo_square.png"},
63
  2:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_C.png",
 
64
  'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_C.png",
65
  'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_C_solo.png"},
66
  3:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_B.png",
 
67
  'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_B.png",
68
  'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_B_solo.png"}}
69
  ### mask init
@@ -72,53 +124,47 @@ def ourhood_inference(prompt=str,num_inference_steps=int,scaffold=int,seed=int):
72
  output_width = 1024
73
 
74
  mask1 = load_image(scaff_dic[scaffold]['mask1'])
 
75
 
76
  masks = processor_mask.preprocess([mask1], height=output_height, width=output_width)
77
  masks = [masks.reshape(1, masks.shape[0], masks.shape[2], masks.shape[3])]
78
 
79
- ###ip_images init
80
-
81
- ip_img_1 = load_image("https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/25hours-hotel_25h_IndreBy_StephanLemke_Sauna1-1024x768.png")
82
-
83
- ip_images = [[ip_img_1]]
84
- pipe_CN.set_ip_adapter_scale([[0.5]])
85
-
86
- n_steps = num_inference_steps
87
-
88
  ###precomputed depth image
89
 
90
  depth_image = load_image(scaff_dic[scaffold]['depth_image'])
91
  canny_image = load_image(scaff_dic[scaffold]['canny_image'])
 
92
 
93
  images_CN = [depth_image, canny_image]
94
 
95
-
96
 
97
  ### inference
98
 
 
 
99
  generator = torch.Generator(device="cuda").manual_seed(seed)
100
 
101
  results = pipe_CN(
102
- prompt=prompt,
103
- ip_adapter_image=ip_images,
104
  negative_prompt="deformed, ugly, wrong proportion, low res, worst quality, low quality,text,watermark",
105
  generator=generator,
106
  num_inference_steps=n_steps,
107
  num_images_per_prompt=1,
108
  denoising_end=0.95,
109
- image=images_CN,
110
  output_type="latent",
111
- control_guidance_start=[0.0, 0.35],
112
- control_guidance_end=[0.35, 1.0],
113
- controlnet_conditioning_scale=[0.5, 1.0],
114
  cross_attention_kwargs={"ip_adapter_masks": masks}
115
  ).images[0]
116
 
117
 
118
  image = refiner(
119
- prompt=prompt,
120
  generator=generator,
121
- num_inference_steps=num_inference_steps,
122
  denoising_start=0.95,
123
  image=results,
124
  ).images[0]
@@ -130,10 +176,10 @@ def ourhood_inference(prompt=str,num_inference_steps=int,scaffold=int,seed=int):
130
  #@spaces.GPU #[uncomment to use ZeroGPU]
131
 
132
  examples = [
133
- "A photograph, of an Ourhood privacy booth, front view, in a warehouse eventspace environment, in the style of event photography, silken oak frame, checkered warm grey exterior fabric, checkered warm grey interior fabric, curtains, diner seating, pillows",
134
- "A photograph, of an Ourhood privacy booth, side view, in a warehouse eventspace environment, in the style of event photography, silken oak frame, taupe exterior fabric",
135
- "A photograph, of an Ourhood privacy booth, close-up, in a HolmrisB8_HQ office environment, in the style of makeshift photoshoot, silken oak frame, taupe exterior fabric, taupe interior fabric, pillows",
136
- "A rendering, of an Ourhood privacy booth, front view, in a Nordic atrium environment, in the style of Keyshot, silken oak frame, taupe exterior fabric, taupe interior fabric, diner seating"]
137
 
138
  css="""
139
  #col-container {
@@ -152,10 +198,10 @@ with gr.Blocks(css=css) as demo:
152
  with gr.Row():
153
 
154
  prompt = gr.Text(
155
- label="Prompt",
156
  show_label=False,
157
  max_lines=1,
158
- placeholder="Enter your prompt",
159
  container=False,
160
  )
161
 
@@ -174,7 +220,7 @@ with gr.Blocks(css=css) as demo:
174
  )
175
 
176
  seed = gr.Slider(
177
- label="tracking number (seed)",
178
  minimum=0,
179
  maximum=MAX_SEED,
180
  step=1,
@@ -186,11 +232,11 @@ with gr.Blocks(css=css) as demo:
186
 
187
 
188
  num_inference_steps = gr.Slider(
189
- label="Number of inference steps",
190
  minimum=35,
191
- maximum=50,
192
  step=1,
193
- value=35, #Replace with defaults that work for your model
194
  )
195
 
196
  gr.Examples(
 
10
  from diffusers import StableDiffusionXLImg2ImgPipeline, DPMSolverMultistepScheduler, AutoencoderTiny, StableDiffusionXLControlNetPipeline, ControlNetModel
11
  from diffusers.utils import load_image
12
  from diffusers.image_processor import IPAdapterMaskProcessor
13
+ from torch import nn
14
+
15
+ ### auxiliary functions
16
+
17
+ def ip_guide(guide, pool):
18
+ distances = []
19
+ cos = nn.CosineSimilarity(dim=1, eps=1e-6)
20
+ for embed in pool:
21
+ dist = cos(guide, embed.to('cuda'))
22
+ distances.append(dist)
23
+ ### find the indexes of the top 5 embeddings
24
+ indexed_distances = list(enumerate(distances))
25
+ # Sort the list of pairs based on the scores
26
+ sorted_distances = sorted(indexed_distances, key=lambda x: x[1])
27
+ # Extract the indexes of the lowest scores
28
+ lowest_indexes = [index for index, score in sorted_distances[:5]]
29
+
30
+ ### return the embeddings with lowest_indexes
31
+ return [pool[i] for i in lowest_indexes], lowest_indexes
32
+
33
+
34
+ def make_inpaint_condition(image, image_mask):
35
+ image = np.array(image.convert("RGB")).astype(np.float32) / 255.0
36
+ image_mask = np.array(image_mask.convert("L")).astype(np.float32) / 255.0
37
+
38
+ assert image.shape[0:1] == image_mask.shape[0:1]
39
+ image[image_mask > 0.5] = -1.0 # set as masked pixel
40
+ image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
41
+ image = torch.from_numpy(image)
42
+ return image
43
 
44
  device = "cuda" if torch.cuda.is_available() else "cpu"
45
  model_repo_id = "stabilityai/sdxl-turbo" #Replace to the model you would like to use
46
 
47
+
48
  if torch.cuda.is_available():
49
  torch_dtype = torch.float16
50
  else:
 
62
 
63
  ###load pipelines
64
 
65
+ pipe_CN = StableDiffusionXLControlNetPipeline.from_pretrained("SG161222/RealVisXL_V5.0", torch_dtype=torch.float16,controlnet=[controlnets[0],controlnets[0],controlnets[1]], use_safetensors=True, variant='fp16')
66
  pipe_CN.vae = AutoencoderTiny.from_pretrained("madebyollin/taesdxl", torch_dtype=torch.float16)
67
  pipe_CN.scheduler=DPMSolverMultistepScheduler.from_pretrained("SG161222/RealVisXL_V5.0",subfolder="scheduler",use_karras_sigmas=True)
68
  pipe_CN.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
 
70
 
71
  ##############################load loras
72
 
73
+ state_dict, network_alphas = StableDiffusionXLControlNetPipeline.lora_state_dict('Tonioesparza/ourhood_training_dreambooth_lora_2_0', weight_name='pytorch_lora_weights.safetensors')
74
+ pipe_CN.load_lora_into_unet(state_dict, network_alphas, pipe_CN.unet, adapter_name='unet_ourhood')
75
+ pipe_CN.load_lora_into_text_encoder(state_dict, network_alphas, pipe_CN.text_encoder, adapter_name='text_ourhood')
76
+ pipe_CN.load_lora_into_text_encoder(state_dict, network_alphas, pipe_CN.text_encoder ,prefix='2', adapter_name='text_2_ourhood')
77
+ pipe_CN.set_adapters(["unet_ourhood","text_ourhood","text_2_ourhood"], adapter_weights=[1.0, 1.0,1.0])
78
+
79
  pipe_CN.fuse_lora()
80
 
81
  refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0",text_encoder_2=pipe_CN.text_encoder_2,vae=pipe_CN.vae,torch_dtype=torch.float16,use_safetensors=True,variant="fp16")
82
  refiner.to("cuda")
83
 
84
+ ip_pool = torch.load("https://huggingface.co/spaces/Tonioesparza/hb8_ourhood_pilot/resolve/main/embeds_cases_for_ip.pt")
85
 
86
+ pool = list(ip_pool.values())
87
 
88
  MAX_SEED = np.iinfo(np.int32).max
89
  MAX_IMAGE_SIZE = 1024
90
 
91
  def ourhood_inference(prompt=str,num_inference_steps=int,scaffold=int,seed=int):
92
 
93
+ ###pro_encode = pipe_cn.encode_text(prompt) ###ip_images init
94
+
95
+ guide = pipe_CN.encode_prompt(prompt)
96
+
97
+ closest,indexes = ip_guide(guide[2],pool)
98
+
99
+ print( [list(ip_pool.keys())[i] for i in indexes])
100
+
101
+ ip_embeds = torch.cat([torch.unsqueeze(torch.zeros_like(closest[0]),0),torch.unsqueeze(closest[0],0)],0).to(dtype=torch.float16,device='cuda')
102
+
103
+ pipe_CN.set_ip_adapter_scale([[0.5]])
104
+
105
+ prompt1 = 'A frontpage photograph of an ourhood privacy booth, silken oak frame, taupe exterior fabric, taupe interior fabric, in ' + prompt
106
 
107
  ### function has no formats defined
108
 
109
  scaff_dic={1:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_square_2.png",
110
+ 'mask2':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_out_square_2.png",
111
  'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_noroof_square.png",
112
  'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_solo_square.png"},
113
  2:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_C.png",
114
+ 'mask2':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_out_C.png",
115
  'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_C.png",
116
  'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_C_solo.png"},
117
  3:{'mask1':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_B.png",
118
+ 'mask2':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_out_B.png",
119
  'depth_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_B.png",
120
  'canny_image':"https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_B_solo.png"}}
121
  ### mask init
 
124
  output_width = 1024
125
 
126
  mask1 = load_image(scaff_dic[scaffold]['mask1'])
127
+ mask2 = load_image(scaff_dic[scaffold]['mask2'])
128
 
129
  masks = processor_mask.preprocess([mask1], height=output_height, width=output_width)
130
  masks = [masks.reshape(1, masks.shape[0], masks.shape[2], masks.shape[3])]
131
 
 
 
 
 
 
 
 
 
 
132
  ###precomputed depth image
133
 
134
  depth_image = load_image(scaff_dic[scaffold]['depth_image'])
135
  canny_image = load_image(scaff_dic[scaffold]['canny_image'])
136
+ masked_depth=make_inpaint_condition(depth_image,mask2)
137
 
138
  images_CN = [depth_image, canny_image]
139
 
 
140
 
141
  ### inference
142
 
143
+ n_steps = num_inference_steps
144
+
145
  generator = torch.Generator(device="cuda").manual_seed(seed)
146
 
147
  results = pipe_CN(
148
+ prompt=prompt1,
149
+ ip_adapter_image_embeds = [ip_embeds],
150
  negative_prompt="deformed, ugly, wrong proportion, low res, worst quality, low quality,text,watermark",
151
  generator=generator,
152
  num_inference_steps=n_steps,
153
  num_images_per_prompt=1,
154
  denoising_end=0.95,
155
+ image=[depth_image,masked_depth,canny_image],
156
  output_type="latent",
157
+ control_guidance_start=[0.0, 0.35, 0.35],
158
+ control_guidance_end=[0.35, 0.95, 0.95],
159
+ controlnet_conditioning_scale=[0.35,0.45, 0.65],
160
  cross_attention_kwargs={"ip_adapter_masks": masks}
161
  ).images[0]
162
 
163
 
164
  image = refiner(
165
+ prompt=prompt1,
166
  generator=generator,
167
+ num_inference_steps=n_steps,
168
  denoising_start=0.95,
169
  image=results,
170
  ).images[0]
 
176
  #@spaces.GPU #[uncomment to use ZeroGPU]
177
 
178
  examples = [
179
+ "in a British museum, pavillion, masonry, high-tables and chairs",
180
+ "in a high ceilinged atrium, glass front, plantwalls, concrete floor, furniture, golden hour",
181
+ "in a colorful open office environment",
182
+ " in a Nordic atrium environment"]
183
 
184
  css="""
185
  #col-container {
 
198
  with gr.Row():
199
 
200
  prompt = gr.Text(
201
+ label="Setting prompt",
202
  show_label=False,
203
  max_lines=1,
204
+ placeholder="Where do you want to show the Ourhood pod?",
205
  container=False,
206
  )
207
 
 
220
  )
221
 
222
  seed = gr.Slider(
223
+ label="Tracking number (seed)",
224
  minimum=0,
225
  maximum=MAX_SEED,
226
  step=1,
 
232
 
233
 
234
  num_inference_steps = gr.Slider(
235
+ label="Detail steps",
236
  minimum=35,
237
+ maximum=75,
238
  step=1,
239
+ value=50, #Replace with defaults that work for your model
240
  )
241
 
242
  gr.Examples(