Spaces:
Running
on
Zero
Running
on
Zero
加注释
Browse files- .gitignore +2 -1
- app.py +43 -19
.gitignore
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
*.pyc
|
|
|
|
1 |
+
*.pyc
|
2 |
+
.idea
|
app.py
CHANGED
@@ -124,16 +124,31 @@ pipe = TryonPipeline.from_pretrained(
|
|
124 |
pipe.unet_encoder = UNet_Encoder
|
125 |
|
126 |
@spaces.GPU
|
127 |
-
def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_steps,seed,category):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
device = "cuda"
|
129 |
-
|
130 |
openpose_model.preprocessor.body_estimation.model.to(device)
|
131 |
pipe.to(device)
|
132 |
pipe.unet_encoder.to(device)
|
133 |
|
134 |
-
|
|
|
135 |
human_img_orig = dict["background"].convert("RGB")
|
136 |
|
|
|
137 |
if is_checked_crop:
|
138 |
width, height = human_img_orig.size
|
139 |
target_width = int(min(width, height * (3 / 4)))
|
@@ -148,38 +163,44 @@ def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_ste
|
|
148 |
else:
|
149 |
human_img = human_img_orig.resize((768,1024))
|
150 |
|
151 |
-
|
152 |
if is_checked:
|
|
|
|
|
153 |
keypoints = openpose_model(human_img.resize((384,512)))
|
|
|
154 |
model_parse, _ = parsing_model(human_img.resize((384,512)))
|
|
|
155 |
mask, mask_gray = get_mask_location('hd', category, model_parse, keypoints)
|
156 |
mask = mask.resize((768,1024))
|
157 |
else:
|
|
|
158 |
mask = pil_to_binary_mask(dict['layers'][0].convert("RGB").resize((768, 1024)))
|
159 |
-
|
160 |
-
|
161 |
mask_gray = (1-transforms.ToTensor()(mask)) * tensor_transfrom(human_img)
|
162 |
mask_gray = to_pil_image((mask_gray+1.0)/2.0)
|
163 |
|
164 |
-
|
|
|
165 |
human_img_arg = _apply_exif_orientation(human_img.resize((384,512)))
|
166 |
human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
|
167 |
|
168 |
-
|
169 |
-
|
170 |
args = apply_net.create_argument_parser().parse_args(('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'))
|
171 |
-
# verbosity = getattr(args, "verbosity", None)
|
172 |
pose_img = args.func(args,human_img_arg)
|
173 |
pose_img = pose_img[:,:,::-1]
|
174 |
pose_img = Image.fromarray(pose_img).resize((768,1024))
|
175 |
|
|
|
176 |
with torch.no_grad():
|
177 |
-
# Extract the images
|
178 |
with torch.cuda.amp.autocast():
|
179 |
with torch.no_grad():
|
|
|
180 |
prompt = "((best quality, masterpiece, ultra-detailed, high quality photography, photo realistic)), the model is wearing " + garment_des
|
181 |
negative_prompt = "monochrome, lowres, bad anatomy, worst quality, normal quality, low quality, blurry, jpeg artifacts, sketch"
|
182 |
with torch.inference_mode():
|
|
|
183 |
(
|
184 |
prompt_embeds,
|
185 |
negative_prompt_embeds,
|
@@ -192,6 +213,7 @@ def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_ste
|
|
192 |
negative_prompt=negative_prompt,
|
193 |
)
|
194 |
|
|
|
195 |
prompt = "((best quality, masterpiece, ultra-detailed, high quality photography, photo realistic)), a photo of " + garment_des
|
196 |
negative_prompt = "monochrome, lowres, bad anatomy, worst quality, normal quality, low quality, blurry, jpeg artifacts, sketch"
|
197 |
if not isinstance(prompt, List):
|
@@ -211,11 +233,12 @@ def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_ste
|
|
211 |
negative_prompt=negative_prompt,
|
212 |
)
|
213 |
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
garm_tensor = tensor_transfrom(garm_img).unsqueeze(0).to(device,torch.float16)
|
218 |
generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
|
|
|
|
|
219 |
images = pipe(
|
220 |
prompt_embeds=prompt_embeds.to(device,torch.float16),
|
221 |
negative_prompt_embeds=negative_prompt_embeds.to(device,torch.float16),
|
@@ -223,18 +246,19 @@ def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_ste
|
|
223 |
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device,torch.float16),
|
224 |
num_inference_steps=denoise_steps,
|
225 |
generator=generator,
|
226 |
-
strength
|
227 |
-
pose_img
|
228 |
text_embeds_cloth=prompt_embeds_c.to(device,torch.float16),
|
229 |
-
cloth
|
230 |
mask_image=mask,
|
231 |
image=human_img,
|
232 |
height=1024,
|
233 |
width=768,
|
234 |
-
ip_adapter_image
|
235 |
guidance_scale=2.0,
|
236 |
)[0]
|
237 |
|
|
|
238 |
if is_checked_crop:
|
239 |
out_img = images[0].resize(crop_size)
|
240 |
human_img_orig.paste(out_img, (int(left), int(top)))
|
|
|
124 |
pipe.unet_encoder = UNet_Encoder
|
125 |
|
126 |
@spaces.GPU
|
127 |
+
def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denoise_steps, seed, category):
|
128 |
+
"""虚拟试衣主函数
|
129 |
+
Args:
|
130 |
+
dict: 输入图像字典,包含背景和图层信息
|
131 |
+
garm_img: 服装图片
|
132 |
+
garment_des: 服装描述文本
|
133 |
+
is_checked: 是否启用自动检测模式
|
134 |
+
is_checked_crop: 是否启用图像裁剪
|
135 |
+
denoise_steps: 去噪步数
|
136 |
+
seed: 随机种子
|
137 |
+
category: 服装类别
|
138 |
+
Returns:
|
139 |
+
生成的试衣结果图像和灰度遮罩
|
140 |
+
"""
|
141 |
+
# 1. 初始化和设备设置 - 使用GPU进行处理
|
142 |
device = "cuda"
|
|
|
143 |
openpose_model.preprocessor.body_estimation.model.to(device)
|
144 |
pipe.to(device)
|
145 |
pipe.unet_encoder.to(device)
|
146 |
|
147 |
+
# 2. 图像预处理 - 调整服装和人物图像大小
|
148 |
+
garm_img = garm_img.convert("RGB").resize((768,1024))
|
149 |
human_img_orig = dict["background"].convert("RGB")
|
150 |
|
151 |
+
# 2.1 如果启用裁剪,按3:4比例裁剪人物图像
|
152 |
if is_checked_crop:
|
153 |
width, height = human_img_orig.size
|
154 |
target_width = int(min(width, height * (3 / 4)))
|
|
|
163 |
else:
|
164 |
human_img = human_img_orig.resize((768,1024))
|
165 |
|
166 |
+
# 3. 生成遮罩
|
167 |
if is_checked:
|
168 |
+
# 3.1 使用自动检测模式
|
169 |
+
# 使用OpenPose检测人体关键点
|
170 |
keypoints = openpose_model(human_img.resize((384,512)))
|
171 |
+
# 使用解析模型生成人体部位解析
|
172 |
model_parse, _ = parsing_model(human_img.resize((384,512)))
|
173 |
+
# 根据类别和关键点生成遮罩
|
174 |
mask, mask_gray = get_mask_location('hd', category, model_parse, keypoints)
|
175 |
mask = mask.resize((768,1024))
|
176 |
else:
|
177 |
+
# 3.2 使用手动提供的遮罩
|
178 |
mask = pil_to_binary_mask(dict['layers'][0].convert("RGB").resize((768, 1024)))
|
179 |
+
|
180 |
+
# 3.3 生成灰度遮罩
|
181 |
mask_gray = (1-transforms.ToTensor()(mask)) * tensor_transfrom(human_img)
|
182 |
mask_gray = to_pil_image((mask_gray+1.0)/2.0)
|
183 |
|
184 |
+
# 4. 姿态处理
|
185 |
+
# 4.1 调整图像方向并转换格式
|
186 |
human_img_arg = _apply_exif_orientation(human_img.resize((384,512)))
|
187 |
human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
|
188 |
|
189 |
+
# 4.2 使用DensePose生成姿态信息
|
|
|
190 |
args = apply_net.create_argument_parser().parse_args(('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'))
|
|
|
191 |
pose_img = args.func(args,human_img_arg)
|
192 |
pose_img = pose_img[:,:,::-1]
|
193 |
pose_img = Image.fromarray(pose_img).resize((768,1024))
|
194 |
|
195 |
+
# 5. AI生成过程
|
196 |
with torch.no_grad():
|
|
|
197 |
with torch.cuda.amp.autocast():
|
198 |
with torch.no_grad():
|
199 |
+
# 5.1 生成正面提示词嵌入
|
200 |
prompt = "((best quality, masterpiece, ultra-detailed, high quality photography, photo realistic)), the model is wearing " + garment_des
|
201 |
negative_prompt = "monochrome, lowres, bad anatomy, worst quality, normal quality, low quality, blurry, jpeg artifacts, sketch"
|
202 |
with torch.inference_mode():
|
203 |
+
# 编码提示词
|
204 |
(
|
205 |
prompt_embeds,
|
206 |
negative_prompt_embeds,
|
|
|
213 |
negative_prompt=negative_prompt,
|
214 |
)
|
215 |
|
216 |
+
# 5.2 生成服装相关的提示词嵌入
|
217 |
prompt = "((best quality, masterpiece, ultra-detailed, high quality photography, photo realistic)), a photo of " + garment_des
|
218 |
negative_prompt = "monochrome, lowres, bad anatomy, worst quality, normal quality, low quality, blurry, jpeg artifacts, sketch"
|
219 |
if not isinstance(prompt, List):
|
|
|
233 |
negative_prompt=negative_prompt,
|
234 |
)
|
235 |
|
236 |
+
# 5.3 准备输入张量
|
237 |
+
pose_img = tensor_transfrom(pose_img).unsqueeze(0).to(device,torch.float16)
|
238 |
+
garm_tensor = tensor_transfrom(garm_img).unsqueeze(0).to(device,torch.float16)
|
|
|
239 |
generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
|
240 |
+
|
241 |
+
# 6. 使用Stable Diffusion XL管道生成图像
|
242 |
images = pipe(
|
243 |
prompt_embeds=prompt_embeds.to(device,torch.float16),
|
244 |
negative_prompt_embeds=negative_prompt_embeds.to(device,torch.float16),
|
|
|
246 |
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device,torch.float16),
|
247 |
num_inference_steps=denoise_steps,
|
248 |
generator=generator,
|
249 |
+
strength=1.0,
|
250 |
+
pose_img=pose_img.to(device,torch.float16),
|
251 |
text_embeds_cloth=prompt_embeds_c.to(device,torch.float16),
|
252 |
+
cloth=garm_tensor.to(device,torch.float16),
|
253 |
mask_image=mask,
|
254 |
image=human_img,
|
255 |
height=1024,
|
256 |
width=768,
|
257 |
+
ip_adapter_image=garm_img.resize((768,1024)),
|
258 |
guidance_scale=2.0,
|
259 |
)[0]
|
260 |
|
261 |
+
# 7. 后处理 - 处理裁剪情况并返回结果
|
262 |
if is_checked_crop:
|
263 |
out_img = images[0].resize(crop_size)
|
264 |
human_img_orig.paste(out_img, (int(left), int(top)))
|