jallenjia commited on
Commit
be72bd8
·
1 Parent(s): e98f05e
Files changed (2) hide show
  1. .gitignore +2 -1
  2. app.py +43 -19
.gitignore CHANGED
@@ -1 +1,2 @@
1
- *.pyc
 
 
1
+ *.pyc
2
+ .idea
app.py CHANGED
@@ -124,16 +124,31 @@ pipe = TryonPipeline.from_pretrained(
124
  pipe.unet_encoder = UNet_Encoder
125
 
126
  @spaces.GPU
127
- def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_steps,seed,category):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  device = "cuda"
129
-
130
  openpose_model.preprocessor.body_estimation.model.to(device)
131
  pipe.to(device)
132
  pipe.unet_encoder.to(device)
133
 
134
- garm_img= garm_img.convert("RGB").resize((768,1024))
 
135
  human_img_orig = dict["background"].convert("RGB")
136
 
 
137
  if is_checked_crop:
138
  width, height = human_img_orig.size
139
  target_width = int(min(width, height * (3 / 4)))
@@ -148,38 +163,44 @@ def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_ste
148
  else:
149
  human_img = human_img_orig.resize((768,1024))
150
 
151
-
152
  if is_checked:
 
 
153
  keypoints = openpose_model(human_img.resize((384,512)))
 
154
  model_parse, _ = parsing_model(human_img.resize((384,512)))
 
155
  mask, mask_gray = get_mask_location('hd', category, model_parse, keypoints)
156
  mask = mask.resize((768,1024))
157
  else:
 
158
  mask = pil_to_binary_mask(dict['layers'][0].convert("RGB").resize((768, 1024)))
159
- # mask = transforms.ToTensor()(mask)
160
- # mask = mask.unsqueeze(0)
161
  mask_gray = (1-transforms.ToTensor()(mask)) * tensor_transfrom(human_img)
162
  mask_gray = to_pil_image((mask_gray+1.0)/2.0)
163
 
164
-
 
165
  human_img_arg = _apply_exif_orientation(human_img.resize((384,512)))
166
  human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
167
 
168
-
169
-
170
  args = apply_net.create_argument_parser().parse_args(('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'))
171
- # verbosity = getattr(args, "verbosity", None)
172
  pose_img = args.func(args,human_img_arg)
173
  pose_img = pose_img[:,:,::-1]
174
  pose_img = Image.fromarray(pose_img).resize((768,1024))
175
 
 
176
  with torch.no_grad():
177
- # Extract the images
178
  with torch.cuda.amp.autocast():
179
  with torch.no_grad():
 
180
  prompt = "((best quality, masterpiece, ultra-detailed, high quality photography, photo realistic)), the model is wearing " + garment_des
181
  negative_prompt = "monochrome, lowres, bad anatomy, worst quality, normal quality, low quality, blurry, jpeg artifacts, sketch"
182
  with torch.inference_mode():
 
183
  (
184
  prompt_embeds,
185
  negative_prompt_embeds,
@@ -192,6 +213,7 @@ def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_ste
192
  negative_prompt=negative_prompt,
193
  )
194
 
 
195
  prompt = "((best quality, masterpiece, ultra-detailed, high quality photography, photo realistic)), a photo of " + garment_des
196
  negative_prompt = "monochrome, lowres, bad anatomy, worst quality, normal quality, low quality, blurry, jpeg artifacts, sketch"
197
  if not isinstance(prompt, List):
@@ -211,11 +233,12 @@ def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_ste
211
  negative_prompt=negative_prompt,
212
  )
213
 
214
-
215
-
216
- pose_img = tensor_transfrom(pose_img).unsqueeze(0).to(device,torch.float16)
217
- garm_tensor = tensor_transfrom(garm_img).unsqueeze(0).to(device,torch.float16)
218
  generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
 
 
219
  images = pipe(
220
  prompt_embeds=prompt_embeds.to(device,torch.float16),
221
  negative_prompt_embeds=negative_prompt_embeds.to(device,torch.float16),
@@ -223,18 +246,19 @@ def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_ste
223
  negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device,torch.float16),
224
  num_inference_steps=denoise_steps,
225
  generator=generator,
226
- strength = 1.0,
227
- pose_img = pose_img.to(device,torch.float16),
228
  text_embeds_cloth=prompt_embeds_c.to(device,torch.float16),
229
- cloth = garm_tensor.to(device,torch.float16),
230
  mask_image=mask,
231
  image=human_img,
232
  height=1024,
233
  width=768,
234
- ip_adapter_image = garm_img.resize((768,1024)),
235
  guidance_scale=2.0,
236
  )[0]
237
 
 
238
  if is_checked_crop:
239
  out_img = images[0].resize(crop_size)
240
  human_img_orig.paste(out_img, (int(left), int(top)))
 
124
  pipe.unet_encoder = UNet_Encoder
125
 
126
  @spaces.GPU
127
+ def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denoise_steps, seed, category):
128
+ """虚拟试衣主函数
129
+ Args:
130
+ dict: 输入图像字典,包含背景和图层信息
131
+ garm_img: 服装图片
132
+ garment_des: 服装描述文本
133
+ is_checked: 是否启用自动检测模式
134
+ is_checked_crop: 是否启用图像裁剪
135
+ denoise_steps: 去噪步数
136
+ seed: 随机种子
137
+ category: 服装类别
138
+ Returns:
139
+ 生成的试衣结果图像和灰度遮罩
140
+ """
141
+ # 1. 初始化和设备设置 - 使用GPU进行处理
142
  device = "cuda"
 
143
  openpose_model.preprocessor.body_estimation.model.to(device)
144
  pipe.to(device)
145
  pipe.unet_encoder.to(device)
146
 
147
+ # 2. 图像预处理 - 调整服装和人物图像大小
148
+ garm_img = garm_img.convert("RGB").resize((768,1024))
149
  human_img_orig = dict["background"].convert("RGB")
150
 
151
+ # 2.1 如果启用裁剪,按3:4比例裁剪人物图像
152
  if is_checked_crop:
153
  width, height = human_img_orig.size
154
  target_width = int(min(width, height * (3 / 4)))
 
163
  else:
164
  human_img = human_img_orig.resize((768,1024))
165
 
166
+ # 3. 生成遮罩
167
  if is_checked:
168
+ # 3.1 使用自动检测模式
169
+ # 使用OpenPose检测人体关键点
170
  keypoints = openpose_model(human_img.resize((384,512)))
171
+ # 使用解析模型生成人体部位解析
172
  model_parse, _ = parsing_model(human_img.resize((384,512)))
173
+ # 根据类别和关键点生成遮罩
174
  mask, mask_gray = get_mask_location('hd', category, model_parse, keypoints)
175
  mask = mask.resize((768,1024))
176
  else:
177
+ # 3.2 使用手动提供的遮罩
178
  mask = pil_to_binary_mask(dict['layers'][0].convert("RGB").resize((768, 1024)))
179
+
180
+ # 3.3 生成灰度遮罩
181
  mask_gray = (1-transforms.ToTensor()(mask)) * tensor_transfrom(human_img)
182
  mask_gray = to_pil_image((mask_gray+1.0)/2.0)
183
 
184
+ # 4. 姿态处理
185
+ # 4.1 调整图像方向并转换格式
186
  human_img_arg = _apply_exif_orientation(human_img.resize((384,512)))
187
  human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
188
 
189
+ # 4.2 使用DensePose生成姿态信息
 
190
  args = apply_net.create_argument_parser().parse_args(('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'))
 
191
  pose_img = args.func(args,human_img_arg)
192
  pose_img = pose_img[:,:,::-1]
193
  pose_img = Image.fromarray(pose_img).resize((768,1024))
194
 
195
+ # 5. AI生成过程
196
  with torch.no_grad():
 
197
  with torch.cuda.amp.autocast():
198
  with torch.no_grad():
199
+ # 5.1 生成正面提示词嵌入
200
  prompt = "((best quality, masterpiece, ultra-detailed, high quality photography, photo realistic)), the model is wearing " + garment_des
201
  negative_prompt = "monochrome, lowres, bad anatomy, worst quality, normal quality, low quality, blurry, jpeg artifacts, sketch"
202
  with torch.inference_mode():
203
+ # 编码提示词
204
  (
205
  prompt_embeds,
206
  negative_prompt_embeds,
 
213
  negative_prompt=negative_prompt,
214
  )
215
 
216
+ # 5.2 生成服装相关的提示词嵌入
217
  prompt = "((best quality, masterpiece, ultra-detailed, high quality photography, photo realistic)), a photo of " + garment_des
218
  negative_prompt = "monochrome, lowres, bad anatomy, worst quality, normal quality, low quality, blurry, jpeg artifacts, sketch"
219
  if not isinstance(prompt, List):
 
233
  negative_prompt=negative_prompt,
234
  )
235
 
236
+ # 5.3 准备输入张量
237
+ pose_img = tensor_transfrom(pose_img).unsqueeze(0).to(device,torch.float16)
238
+ garm_tensor = tensor_transfrom(garm_img).unsqueeze(0).to(device,torch.float16)
 
239
  generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
240
+
241
+ # 6. 使用Stable Diffusion XL管道生成图像
242
  images = pipe(
243
  prompt_embeds=prompt_embeds.to(device,torch.float16),
244
  negative_prompt_embeds=negative_prompt_embeds.to(device,torch.float16),
 
246
  negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device,torch.float16),
247
  num_inference_steps=denoise_steps,
248
  generator=generator,
249
+ strength=1.0,
250
+ pose_img=pose_img.to(device,torch.float16),
251
  text_embeds_cloth=prompt_embeds_c.to(device,torch.float16),
252
+ cloth=garm_tensor.to(device,torch.float16),
253
  mask_image=mask,
254
  image=human_img,
255
  height=1024,
256
  width=768,
257
+ ip_adapter_image=garm_img.resize((768,1024)),
258
  guidance_scale=2.0,
259
  )[0]
260
 
261
+ # 7. 后处理 - 处理裁剪情况并返回结果
262
  if is_checked_crop:
263
  out_img = images[0].resize(crop_size)
264
  human_img_orig.paste(out_img, (int(left), int(top)))