Spaces:
Sleeping
Sleeping
Tonioesparza
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,21 +1,23 @@
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
import random
|
4 |
-
#import spaces #[uncomment to use ZeroGPU]
|
5 |
import os
|
6 |
from PIL import Image, ImageDraw, ImageFont
|
7 |
import torch
|
8 |
from PIL import Image
|
9 |
from diffusers.utils import load_image
|
10 |
from diffusers import DPMSolverSDEScheduler
|
11 |
-
from diffusers import StableDiffusionXLImg2ImgPipeline, DPMSolverMultistepScheduler, AutoencoderTiny,
|
|
|
12 |
from diffusers.utils import load_image
|
13 |
from diffusers.image_processor import IPAdapterMaskProcessor
|
14 |
from torch import nn
|
15 |
|
|
|
16 |
### auxiliary functions
|
17 |
|
18 |
-
def ip_guide(guide, pool):
|
19 |
distances = []
|
20 |
cos = nn.CosineSimilarity(dim=1, eps=1e-6)
|
21 |
for embed in pool:
|
@@ -26,7 +28,7 @@ def ip_guide(guide, pool):
|
|
26 |
# Sort the list of pairs based on the scores
|
27 |
sorted_distances = sorted(indexed_distances, key=lambda x: x[1])
|
28 |
# Extract the indexes of the lowest scores
|
29 |
-
lowest_indexes = [index for index, score in sorted_distances[:
|
30 |
|
31 |
### return the embeddings with lowest_indexes
|
32 |
return [pool[i] for i in lowest_indexes], lowest_indexes
|
@@ -42,9 +44,28 @@ def make_inpaint_condition(image, image_mask):
|
|
42 |
image = torch.from_numpy(image)
|
43 |
return image
|
44 |
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
if torch.cuda.is_available():
|
50 |
torch_dtype = torch.float16
|
@@ -54,32 +75,40 @@ else:
|
|
54 |
processor_mask = IPAdapterMaskProcessor()
|
55 |
controlnets = [
|
56 |
ControlNetModel.from_pretrained(
|
57 |
-
"diffusers/controlnet-depth-sdxl-1.0",variant="fp16",use_safetensors=True,torch_dtype=torch.float16
|
58 |
),
|
59 |
ControlNetModel.from_pretrained(
|
60 |
-
"diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16, use_safetensors=True,variant="fp16"
|
61 |
),
|
62 |
]
|
63 |
|
64 |
###load pipelines
|
65 |
|
66 |
-
pipe_CN = StableDiffusionXLControlNetPipeline.from_pretrained("SG161222/RealVisXL_V5.0", torch_dtype=torch.float16,
|
|
|
|
|
|
|
67 |
###pipe_CN.vae = AutoencoderTiny.from_pretrained("madebyollin/taesdxl", torch_dtype=torch.float16)
|
68 |
-
pipe_CN.scheduler=DPMSolverSDEScheduler.from_pretrained("SG161222/RealVisXL_V5.0",subfolder="scheduler",
|
|
|
69 |
pipe_CN.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
|
70 |
pipe_CN.to("cuda")
|
71 |
|
72 |
##############################load loras
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
pipe_CN.
|
77 |
-
pipe_CN.load_lora_into_text_encoder(state_dict, network_alphas, pipe_CN.text_encoder
|
78 |
-
pipe_CN.
|
|
|
79 |
|
80 |
pipe_CN.fuse_lora()
|
81 |
|
82 |
-
refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0",
|
|
|
|
|
|
|
83 |
refiner.to("cuda")
|
84 |
|
85 |
ip_pool = torch.load("./embeds_cases_for_ip.pt")
|
@@ -89,37 +118,148 @@ pool = list(ip_pool.values())
|
|
89 |
MAX_SEED = np.iinfo(np.int32).max
|
90 |
MAX_IMAGE_SIZE = 1024
|
91 |
|
92 |
-
|
93 |
|
94 |
-
|
|
|
|
|
|
|
95 |
|
96 |
guide = pipe_CN.encode_prompt(prompt)
|
97 |
|
98 |
-
closest,indexes = ip_guide(guide[2],pool)
|
99 |
|
100 |
-
|
101 |
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
-
pipe_CN.set_ip_adapter_scale([[0.5]])
|
105 |
-
|
106 |
prompt1 = 'A photograph, of an OurHood privacy booth, with a silken oak frame, hickory stained melange polyester fabric, in ' + prompt
|
107 |
|
108 |
-
###
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
output_height = 1024
|
125 |
output_width = 1024
|
@@ -130,38 +270,38 @@ def ourhood_inference(prompt=str,num_inference_steps=int,scaffold=int,seed=int):
|
|
130 |
masks = processor_mask.preprocess([mask1], height=output_height, width=output_width)
|
131 |
masks = [masks.reshape(1, masks.shape[0], masks.shape[2], masks.shape[3])]
|
132 |
|
133 |
-
###precomputed depth image
|
134 |
|
135 |
depth_image = load_image(scaff_dic[scaffold]['depth_image'])
|
136 |
canny_image = load_image(scaff_dic[scaffold]['canny_image'])
|
137 |
-
masked_depth=make_inpaint_condition(depth_image,mask2)
|
138 |
|
139 |
images_CN = [depth_image, canny_image]
|
140 |
|
141 |
-
|
142 |
-
### inference
|
143 |
|
144 |
n_steps = num_inference_steps
|
145 |
|
146 |
generator = torch.Generator(device="cuda").manual_seed(seed)
|
147 |
|
148 |
results = pipe_CN(
|
149 |
-
|
150 |
-
|
151 |
-
|
|
|
|
|
152 |
generator=generator,
|
153 |
num_inference_steps=n_steps,
|
154 |
num_images_per_prompt=1,
|
155 |
denoising_end=0.95,
|
156 |
-
image=[depth_image,masked_depth,canny_image],
|
157 |
output_type="latent",
|
158 |
control_guidance_start=[0.0, 0.35, 0.35],
|
159 |
control_guidance_end=[0.35, 0.95, 0.95],
|
160 |
-
controlnet_conditioning_scale=[0.35,0.95, 0.95],
|
161 |
cross_attention_kwargs={"ip_adapter_masks": masks}
|
162 |
).images[0]
|
163 |
|
164 |
-
|
165 |
image = refiner(
|
166 |
prompt=prompt1,
|
167 |
generator=generator,
|
@@ -227,6 +367,22 @@ with gr.Blocks(css=css) as demo:
|
|
227 |
step=1,
|
228 |
value=0,
|
229 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
|
231 |
|
232 |
with gr.Row():
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
import random
|
4 |
+
# import spaces #[uncomment to use ZeroGPU]
|
5 |
import os
|
6 |
from PIL import Image, ImageDraw, ImageFont
|
7 |
import torch
|
8 |
from PIL import Image
|
9 |
from diffusers.utils import load_image
|
10 |
from diffusers import DPMSolverSDEScheduler
|
11 |
+
from diffusers import StableDiffusionXLImg2ImgPipeline, DPMSolverMultistepScheduler, AutoencoderTiny, \
|
12 |
+
StableDiffusionXLControlNetPipeline, ControlNetModel
|
13 |
from diffusers.utils import load_image
|
14 |
from diffusers.image_processor import IPAdapterMaskProcessor
|
15 |
from torch import nn
|
16 |
|
17 |
+
|
18 |
### auxiliary functions
|
19 |
|
20 |
+
def ip_guide(guide, pool,num=3):
|
21 |
distances = []
|
22 |
cos = nn.CosineSimilarity(dim=1, eps=1e-6)
|
23 |
for embed in pool:
|
|
|
28 |
# Sort the list of pairs based on the scores
|
29 |
sorted_distances = sorted(indexed_distances, key=lambda x: x[1])
|
30 |
# Extract the indexes of the lowest scores
|
31 |
+
lowest_indexes = [index for index, score in sorted_distances[:num]]
|
32 |
|
33 |
### return the embeddings with lowest_indexes
|
34 |
return [pool[i] for i in lowest_indexes], lowest_indexes
|
|
|
44 |
image = torch.from_numpy(image)
|
45 |
return image
|
46 |
|
47 |
+
def find_token_sequence_in_pre_tokenized(input_string, other_string,pipe):
|
48 |
+
# Load the tokenizer
|
49 |
+
tokenizer = pipe.tokenizer
|
50 |
+
|
51 |
+
# Tokenize the input string
|
52 |
+
input_tokens = tokenizer.tokenize(input_string)
|
53 |
+
|
54 |
+
# Tokenize the other string
|
55 |
+
|
56 |
+
pre_tokenized_tokens = tokenizer.tokenize(other_string)
|
57 |
+
# Find matching token sequences and their indexes
|
58 |
+
matching_sequences = []
|
59 |
+
input_length = len(input_tokens)
|
60 |
+
for i in range(len(pre_tokenized_tokens) - input_length + 1):
|
61 |
+
if pre_tokenized_tokens[i:i + input_length] == input_tokens:
|
62 |
+
matching_sequences.append((pre_tokenized_tokens[i:i + input_length], i))
|
63 |
|
64 |
+
return matching_sequences
|
65 |
+
|
66 |
+
|
67 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
68 |
+
model_repo_id = "stabilityai/sdxl-turbo" # Replace to the model you would like to use
|
69 |
|
70 |
if torch.cuda.is_available():
|
71 |
torch_dtype = torch.float16
|
|
|
75 |
processor_mask = IPAdapterMaskProcessor()
|
76 |
controlnets = [
|
77 |
ControlNetModel.from_pretrained(
|
78 |
+
"diffusers/controlnet-depth-sdxl-1.0", variant="fp16", use_safetensors=True, torch_dtype=torch.float16
|
79 |
),
|
80 |
ControlNetModel.from_pretrained(
|
81 |
+
"diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16"
|
82 |
),
|
83 |
]
|
84 |
|
85 |
###load pipelines
|
86 |
|
87 |
+
pipe_CN = StableDiffusionXLControlNetPipeline.from_pretrained("SG161222/RealVisXL_V5.0", torch_dtype=torch.float16,
|
88 |
+
controlnet=[controlnets[0], controlnets[0],
|
89 |
+
controlnets[1]], use_safetensors=True,
|
90 |
+
variant='fp16')
|
91 |
###pipe_CN.vae = AutoencoderTiny.from_pretrained("madebyollin/taesdxl", torch_dtype=torch.float16)
|
92 |
+
pipe_CN.scheduler = DPMSolverSDEScheduler.from_pretrained("SG161222/RealVisXL_V5.0", subfolder="scheduler",
|
93 |
+
use_karras_sigmas=True)
|
94 |
pipe_CN.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
|
95 |
pipe_CN.to("cuda")
|
96 |
|
97 |
##############################load loras
|
98 |
+
pipe_CN.load_lora_weights('Tonioesparza/ourhood_training_dreambooth_lora_2_0',
|
99 |
+
weight_name='pytorch_lora_weights.safetensors')
|
100 |
+
#state_dict, network_alphas = StableDiffusionXLControlNetPipeline.lora_state_dict('Tonioesparza/ourhood_training_dreambooth_lora_2_0', weight_name='pytorch_lora_weights.safetensors')
|
101 |
+
#pipe_CN.load_lora_into_unet(state_dict, network_alphas, pipe_CN.unet, adapter_name='unet_ourhood')
|
102 |
+
#pipe_CN.load_lora_into_text_encoder(state_dict, network_alphas, pipe_CN.text_encoder, adapter_name='text_ourhood')
|
103 |
+
#pipe_CN.load_lora_into_text_encoder(state_dict, network_alphas, pipe_CN.text_encoder, prefix='2', adapter_name='text_2_ourhood')
|
104 |
+
#pipe_CN.set_adapters(["unet_ourhood", "text_ourhood", "text_2_ourhood"], adapter_weights=[1.0, 1.0, 1.0])
|
105 |
|
106 |
pipe_CN.fuse_lora()
|
107 |
|
108 |
+
refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0",
|
109 |
+
text_encoder_2=pipe_CN.text_encoder_2, vae=pipe_CN.vae,
|
110 |
+
torch_dtype=torch.float16, use_safetensors=True,
|
111 |
+
variant="fp16")
|
112 |
refiner.to("cuda")
|
113 |
|
114 |
ip_pool = torch.load("./embeds_cases_for_ip.pt")
|
|
|
118 |
MAX_SEED = np.iinfo(np.int32).max
|
119 |
MAX_IMAGE_SIZE = 1024
|
120 |
|
121 |
+
slingshot = torch.load("./slingshot.pt")
|
122 |
|
123 |
+
def ourhood_inference(prompt=str, num_inference_steps=int, scaffold=int, seed=int, cases_strength = 0.5, cases_scope = 5):
|
124 |
+
###pro_encode = pipe_cn.encode_text(prompt) ###ip_images init
|
125 |
+
|
126 |
+
condition = 'both'
|
127 |
|
128 |
guide = pipe_CN.encode_prompt(prompt)
|
129 |
|
130 |
+
closest, indexes = ip_guide(guide[2], pool,cases_scope)
|
131 |
|
132 |
+
### torch.mean de los indexes
|
133 |
|
134 |
+
ip_means = torch.mean(torch.stack([pool[i] for i in indexes]), dim=0)
|
135 |
+
|
136 |
+
print([list(ip_pool.keys())[i] for i in indexes])
|
137 |
+
|
138 |
+
ip_embeds = torch.cat([torch.unsqueeze(torch.zeros_like(closest[0]), 0), torch.unsqueeze(ip_means, 0)], 0).to(
|
139 |
+
dtype=torch.float16, device='cuda')
|
140 |
+
|
141 |
+
pipe_CN.set_ip_adapter_scale([[cases_strength]])
|
142 |
|
|
|
|
|
143 |
prompt1 = 'A photograph, of an OurHood privacy booth, with a silken oak frame, hickory stained melange polyester fabric, in ' + prompt
|
144 |
|
145 |
+
### prompt encoding
|
146 |
+
|
147 |
+
text_inputs = pipe_CN.tokenizer(
|
148 |
+
prompt1,
|
149 |
+
padding="max_length",
|
150 |
+
max_length=pipe_CN.tokenizer.model_max_length,
|
151 |
+
truncation=True,
|
152 |
+
return_tensors="pt",
|
153 |
+
)
|
154 |
+
|
155 |
+
text_input_ids = text_inputs.input_ids
|
156 |
+
|
157 |
+
prompt_embeds_1 = pipe_CN.text_encoder(text_input_ids.to('cuda'), output_hidden_states=True)
|
158 |
+
|
159 |
+
prompt_embeds_1 = prompt_embeds_1.hidden_states[-2]
|
160 |
+
|
161 |
+
###embed prompt encoding 2
|
162 |
+
|
163 |
+
prompt_embeds_2 = pipe_CN.text_encoder_2(text_input_ids.to('cuda'), output_hidden_states=True)
|
164 |
+
|
165 |
+
pooled_prompt_embeds_2 = prompt_embeds_2[0]
|
166 |
+
|
167 |
+
prompt_embeds_2 = prompt_embeds_2.hidden_states[-2]
|
168 |
+
|
169 |
+
#### substraction
|
170 |
+
|
171 |
+
if condition == 'both':
|
172 |
+
|
173 |
+
matches = find_token_sequence_in_pre_tokenized('ourhood privacy booth', prompt1, pipe_CN)
|
174 |
+
|
175 |
+
items = []
|
176 |
+
|
177 |
+
for match in matches:
|
178 |
+
for w in range(len(match[0])):
|
179 |
+
items.append(match[1] + w)
|
180 |
+
|
181 |
+
for it in items:
|
182 |
+
prompt_embeds_2[0][it] = prompt_embeds_2[0][it] + slingshot['b'].to('cuda')
|
183 |
+
|
184 |
+
pooled_prompt_embeds = pooled_prompt_embeds_2 + slingshot['b'].to('cuda')
|
185 |
+
|
186 |
+
elif condition == 'pooled':
|
187 |
+
|
188 |
+
pooled_prompt_embeds = pooled_prompt_embeds_2 + slingshot['b'].to('cuda')
|
189 |
+
|
190 |
+
elif condition == 'embeds':
|
191 |
+
|
192 |
+
matches = find_token_sequence_in_pre_tokenized('ourhood privacy booth', prompt1, pipe_CN)
|
193 |
+
|
194 |
+
items = []
|
195 |
+
|
196 |
+
for match in matches:
|
197 |
+
for w in range(len(match[0])):
|
198 |
+
items.append(match[1] + w)
|
199 |
+
|
200 |
+
for it in items:
|
201 |
+
prompt_embeds_2[0][it] = prompt_embeds_2[0][it] + slingshot['b'].to('cuda')
|
202 |
+
|
203 |
+
### concatenation
|
204 |
+
|
205 |
+
prompt_embeds = torch.cat([prompt_embeds_1, prompt_embeds_2], dim=-1)
|
206 |
+
|
207 |
+
### create negative embeds text encoder 1
|
208 |
+
|
209 |
+
negative_prompt = "deformed, ugly, wrong proportion, low res, worst quality, low quality,text,watermark"
|
210 |
+
|
211 |
+
max_length = prompt_embeds.shape[1]
|
212 |
+
|
213 |
+
uncond_input = pipe_CN.tokenizer(
|
214 |
+
negative_prompt,
|
215 |
+
padding="max_length",
|
216 |
+
max_length=max_length,
|
217 |
+
truncation=True,
|
218 |
+
return_tensors="pt",
|
219 |
+
)
|
220 |
+
|
221 |
+
uncond_input_ids = uncond_input.input_ids
|
222 |
+
|
223 |
+
negative_prompt_embeds_1 = pipe_CN.text_encoder(
|
224 |
+
uncond_input_ids.to('cuda'),
|
225 |
+
output_hidden_states=True,
|
226 |
+
)
|
227 |
+
|
228 |
+
negative_prompt_embeds_1 = negative_prompt_embeds_1.hidden_states[-2]
|
229 |
+
|
230 |
+
### create negative embeds text encoder 2
|
231 |
+
|
232 |
+
negative_prompt_embeds_2 = pipe_CN.text_encoder_2(
|
233 |
+
uncond_input_ids.to('cuda'),
|
234 |
+
output_hidden_states=True,
|
235 |
+
)
|
236 |
+
|
237 |
+
negative_pooled_prompt_embeds = negative_prompt_embeds_2[0]
|
238 |
+
|
239 |
+
negative_prompt_embeds_2 = negative_prompt_embeds_2.hidden_states[-2]
|
240 |
+
|
241 |
+
### negative concatenation
|
242 |
+
|
243 |
+
negative_prompt_embeds = torch.cat([negative_prompt_embeds_1, negative_prompt_embeds_2], dim=-1)
|
244 |
+
|
245 |
+
### function has no formats defined
|
246 |
+
|
247 |
+
scaff_dic = {1: {
|
248 |
+
'mask1': "https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_square_2.png",
|
249 |
+
'mask2': "https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_out_square_2.png",
|
250 |
+
'depth_image': "https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_noroof_square.png",
|
251 |
+
'canny_image': "https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_depth_solo_square.png"},
|
252 |
+
2: {
|
253 |
+
'mask1': "https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_C.png",
|
254 |
+
'mask2': "https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_out_C.png",
|
255 |
+
'depth_image': "https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_C.png",
|
256 |
+
'canny_image': "https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_C_solo.png"},
|
257 |
+
3: {
|
258 |
+
'mask1': "https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_in_B.png",
|
259 |
+
'mask2': "https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/mask_out_B.png",
|
260 |
+
'depth_image': "https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/depth_B.png",
|
261 |
+
'canny_image': "https://huggingface.co/Tonioesparza/ourhood_training_dreambooth_lora_2_0/resolve/main/canny_B_solo.png"}}
|
262 |
+
### mask init
|
263 |
|
264 |
output_height = 1024
|
265 |
output_width = 1024
|
|
|
270 |
masks = processor_mask.preprocess([mask1], height=output_height, width=output_width)
|
271 |
masks = [masks.reshape(1, masks.shape[0], masks.shape[2], masks.shape[3])]
|
272 |
|
273 |
+
###precomputed depth image
|
274 |
|
275 |
depth_image = load_image(scaff_dic[scaffold]['depth_image'])
|
276 |
canny_image = load_image(scaff_dic[scaffold]['canny_image'])
|
277 |
+
masked_depth = make_inpaint_condition(depth_image, mask2)
|
278 |
|
279 |
images_CN = [depth_image, canny_image]
|
280 |
|
281 |
+
### inference
|
|
|
282 |
|
283 |
n_steps = num_inference_steps
|
284 |
|
285 |
generator = torch.Generator(device="cuda").manual_seed(seed)
|
286 |
|
287 |
results = pipe_CN(
|
288 |
+
prompt_embeds=prompt_embeds,
|
289 |
+
negative_prompt_embeds=negative_prompt_embeds,
|
290 |
+
pooled_prompt_embeds=pooled_prompt_embeds_2,
|
291 |
+
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
|
292 |
+
ip_adapter_image_embeds=[ip_embeds],
|
293 |
generator=generator,
|
294 |
num_inference_steps=n_steps,
|
295 |
num_images_per_prompt=1,
|
296 |
denoising_end=0.95,
|
297 |
+
image=[depth_image, masked_depth, canny_image],
|
298 |
output_type="latent",
|
299 |
control_guidance_start=[0.0, 0.35, 0.35],
|
300 |
control_guidance_end=[0.35, 0.95, 0.95],
|
301 |
+
controlnet_conditioning_scale=[0.35, 0.95, 0.95],
|
302 |
cross_attention_kwargs={"ip_adapter_masks": masks}
|
303 |
).images[0]
|
304 |
|
|
|
305 |
image = refiner(
|
306 |
prompt=prompt1,
|
307 |
generator=generator,
|
|
|
367 |
step=1,
|
368 |
value=0,
|
369 |
)
|
370 |
+
|
371 |
+
cases_strength = gr.Slider(
|
372 |
+
label="Brand strenght",
|
373 |
+
minimum=0.0,
|
374 |
+
maximum=1.0,
|
375 |
+
step=0.05,
|
376 |
+
value=0.5,
|
377 |
+
)
|
378 |
+
|
379 |
+
cases_scope = gr.Slider(
|
380 |
+
label="Brand scope",
|
381 |
+
minimum=1,
|
382 |
+
maximum=10,
|
383 |
+
step=1,
|
384 |
+
value=1,
|
385 |
+
)
|
386 |
|
387 |
|
388 |
with gr.Row():
|