Spaces:

jallenjia
/

Change-Clothes-AI

Running on Zero

App Files Files Community

jallenjia commited on Nov 18, 2024

Commit

be72bd8

1 Parent(s): e98f05e

加注释

Browse files

Files changed (2) hide show

.gitignore +2 -1
app.py +43 -19

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	- *.pyc


1	+ *.pyc
2	+ .idea

app.py CHANGED Viewed

@@ -124,16 +124,31 @@ pipe = TryonPipeline.from_pretrained(
 pipe.unet_encoder = UNet_Encoder
 @spaces.GPU
-def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_steps,seed,category):
     device = "cuda"
     openpose_model.preprocessor.body_estimation.model.to(device)
     pipe.to(device)
     pipe.unet_encoder.to(device)
-    garm_img= garm_img.convert("RGB").resize((768,1024))
     human_img_orig = dict["background"].convert("RGB")
     if is_checked_crop:
         width, height = human_img_orig.size
         target_width = int(min(width, height * (3 / 4)))
@@ -148,38 +163,44 @@ def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_ste
     else:
         human_img = human_img_orig.resize((768,1024))
     if is_checked:
         keypoints = openpose_model(human_img.resize((384,512)))
         model_parse, _ = parsing_model(human_img.resize((384,512)))
         mask, mask_gray = get_mask_location('hd', category, model_parse, keypoints)
         mask = mask.resize((768,1024))
     else:
         mask = pil_to_binary_mask(dict['layers'][0].convert("RGB").resize((768, 1024)))
-        # mask = transforms.ToTensor()(mask)
-        # mask = mask.unsqueeze(0)
     mask_gray = (1-transforms.ToTensor()(mask)) * tensor_transfrom(human_img)
     mask_gray = to_pil_image((mask_gray+1.0)/2.0)
     human_img_arg = _apply_exif_orientation(human_img.resize((384,512)))
     human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
     args = apply_net.create_argument_parser().parse_args(('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'))
-    # verbosity = getattr(args, "verbosity", None)
     pose_img = args.func(args,human_img_arg)
     pose_img = pose_img[:,:,::-1]
     pose_img = Image.fromarray(pose_img).resize((768,1024))
     with torch.no_grad():
-        # Extract the images
         with torch.cuda.amp.autocast():
             with torch.no_grad():
                 prompt = "((best quality, masterpiece, ultra-detailed, high quality photography, photo realistic)), the model is wearing " + garment_des
                 negative_prompt = "monochrome, lowres, bad anatomy, worst quality, normal quality, low quality, blurry, jpeg artifacts, sketch"
                 with torch.inference_mode():
                     (
                         prompt_embeds,
                         negative_prompt_embeds,
@@ -192,6 +213,7 @@ def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_ste
                         negative_prompt=negative_prompt,
                     )
                     prompt = "((best quality, masterpiece, ultra-detailed, high quality photography, photo realistic)), a photo of " + garment_des
                     negative_prompt = "monochrome, lowres, bad anatomy, worst quality, normal quality, low quality, blurry, jpeg artifacts, sketch"
                     if not isinstance(prompt, List):
@@ -211,11 +233,12 @@ def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_ste
                             negative_prompt=negative_prompt,
                         )
-                    pose_img =  tensor_transfrom(pose_img).unsqueeze(0).to(device,torch.float16)
-                    garm_tensor =  tensor_transfrom(garm_img).unsqueeze(0).to(device,torch.float16)
                     generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
                     images = pipe(
                         prompt_embeds=prompt_embeds.to(device,torch.float16),
                         negative_prompt_embeds=negative_prompt_embeds.to(device,torch.float16),
@@ -223,18 +246,19 @@ def start_tryon(dict,garm_img,garment_des,is_checked,is_checked_crop,denoise_ste
                         negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device,torch.float16),
                         num_inference_steps=denoise_steps,
                         generator=generator,
-                        strength = 1.0,
-                        pose_img = pose_img.to(device,torch.float16),
                         text_embeds_cloth=prompt_embeds_c.to(device,torch.float16),
-                        cloth = garm_tensor.to(device,torch.float16),
                         mask_image=mask,
                         image=human_img,
                         height=1024,
                         width=768,
-                        ip_adapter_image = garm_img.resize((768,1024)),
                         guidance_scale=2.0,
                     )[0]
     if is_checked_crop:
         out_img = images[0].resize(crop_size)
         human_img_orig.paste(out_img, (int(left), int(top)))

 pipe.unet_encoder = UNet_Encoder
 @spaces.GPU
+def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denoise_steps, seed, category):
+    """虚拟试衣主函数
+    Args:
+        dict: 输入图像字典，包含背景和图层信息
+        garm_img: 服装图片
+        garment_des: 服装描述文本
+        is_checked: 是否启用自动检测模式
+        is_checked_crop: 是否启用图像裁剪
+        denoise_steps: 去噪步数
+        seed: 随机种子
+        category: 服装类别
+    Returns:
+        生成的试衣结果图像和灰度遮罩
+    """
+    # 1. 初始化和设备设置 - 使用GPU进行处理
     device = "cuda"
     openpose_model.preprocessor.body_estimation.model.to(device)
     pipe.to(device)
     pipe.unet_encoder.to(device)
+    # 2. 图像预处理 - 调整服装和人物图像大小
+    garm_img = garm_img.convert("RGB").resize((768,1024))
     human_img_orig = dict["background"].convert("RGB")
+    # 2.1 如果启用裁剪，按3:4比例裁剪人物图像
     if is_checked_crop:
         width, height = human_img_orig.size
         target_width = int(min(width, height * (3 / 4)))
     else:
         human_img = human_img_orig.resize((768,1024))
+    # 3. 生成遮罩
     if is_checked:
+        # 3.1 使用自动检测模式
+        # 使用OpenPose检测人体关键点
         keypoints = openpose_model(human_img.resize((384,512)))
+        # 使用解析模型生成人体部位解析
         model_parse, _ = parsing_model(human_img.resize((384,512)))
+        # 根据类别和关键点生成遮罩
         mask, mask_gray = get_mask_location('hd', category, model_parse, keypoints)
         mask = mask.resize((768,1024))
     else:
+        # 3.2 使用手动提供的遮罩
         mask = pil_to_binary_mask(dict['layers'][0].convert("RGB").resize((768, 1024)))
+    # 3.3 生成灰度遮罩
     mask_gray = (1-transforms.ToTensor()(mask)) * tensor_transfrom(human_img)
     mask_gray = to_pil_image((mask_gray+1.0)/2.0)
+    # 4. 姿态处理
+    # 4.1 调整图像方向并转换格式
     human_img_arg = _apply_exif_orientation(human_img.resize((384,512)))
     human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
+    # 4.2 使用DensePose生成姿态信息
     args = apply_net.create_argument_parser().parse_args(('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'))
     pose_img = args.func(args,human_img_arg)
     pose_img = pose_img[:,:,::-1]
     pose_img = Image.fromarray(pose_img).resize((768,1024))
+    # 5. AI生成过程
     with torch.no_grad():
         with torch.cuda.amp.autocast():
             with torch.no_grad():
+                # 5.1 生成正面提示词嵌入
                 prompt = "((best quality, masterpiece, ultra-detailed, high quality photography, photo realistic)), the model is wearing " + garment_des
                 negative_prompt = "monochrome, lowres, bad anatomy, worst quality, normal quality, low quality, blurry, jpeg artifacts, sketch"
                 with torch.inference_mode():
+                    # 编码提示词
                     (
                         prompt_embeds,
                         negative_prompt_embeds,
                         negative_prompt=negative_prompt,
                     )
+                    # 5.2 生成服装相关的提示词嵌入
                     prompt = "((best quality, masterpiece, ultra-detailed, high quality photography, photo realistic)), a photo of " + garment_des
                     negative_prompt = "monochrome, lowres, bad anatomy, worst quality, normal quality, low quality, blurry, jpeg artifacts, sketch"
                     if not isinstance(prompt, List):
                             negative_prompt=negative_prompt,
                         )
+                    # 5.3 准备输入张量
+                    pose_img = tensor_transfrom(pose_img).unsqueeze(0).to(device,torch.float16)
+                    garm_tensor = tensor_transfrom(garm_img).unsqueeze(0).to(device,torch.float16)
                     generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
+                    # 6. 使用Stable Diffusion XL管道生成图像
                     images = pipe(
                         prompt_embeds=prompt_embeds.to(device,torch.float16),
                         negative_prompt_embeds=negative_prompt_embeds.to(device,torch.float16),
                         negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device,torch.float16),
                         num_inference_steps=denoise_steps,
                         generator=generator,
+                        strength=1.0,
+                        pose_img=pose_img.to(device,torch.float16),
                         text_embeds_cloth=prompt_embeds_c.to(device,torch.float16),
+                        cloth=garm_tensor.to(device,torch.float16),
                         mask_image=mask,
                         image=human_img,
                         height=1024,
                         width=768,
+                        ip_adapter_image=garm_img.resize((768,1024)),
                         guidance_scale=2.0,
                     )[0]
+    # 7. 后处理 - 处理裁剪情况并返回结果
     if is_checked_crop:
         out_img = images[0].resize(crop_size)
         human_img_orig.paste(out_img, (int(left), int(top)))