ConsistentID

Runtime error

App Files Files Community

adaface-neurips commited on Sep 14, 2024

Commit

3558684

1 Parent(s): d08dbd0

update app.py, add guidance scale slider

Browse files

Files changed (2) hide show

app.py +15 -5
lib/pipline_ConsistentID.py +2 -3

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ device = f"cuda:{args.gpu}"
 pipe = ConsistentIDPipeline.from_pretrained(
     args.base_model_path,
     torch_dtype=torch.float16,
-).to(device)
 ### Load consistentID_model checkpoint
 pipe.load_ConsistentID_model(
@@ -30,11 +30,12 @@ pipe.load_ConsistentID_model(
     bise_net_weight_path="./models/BiSeNet_pretrained_for_ConsistentID.pth",
 )
 pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
 @spaces.GPU
 def process(selected_template_images, custom_image, prompt,
             negative_prompt, prompt_selected, model_selected_tab,
-            prompt_selected_tab, width, height, merge_steps, seed_set):
     # The gradio UI only supports one image at a time.
     if model_selected_tab==0:
@@ -80,6 +81,7 @@ def process(selected_template_images, custom_image, prompt,
         negative_prompt=negative_prompt,
         num_images_per_prompt=1,
         num_inference_steps=num_steps,
         start_merge_step=merge_steps,
         generator=generator,
     ).images[0]
@@ -135,9 +137,17 @@ with gr.Blocks(title="ConsistentID Demo") as demo:
                 prompt_selected_tabs = [template_prompts_tab, custom_prompt_tab]
                 for i, tab in enumerate(prompt_selected_tabs):
                     tab.select(fn=lambda tabnum=i: tabnum, inputs=[], outputs=[prompt_selected_tab])
             width = gr.Slider(label="image width",minimum=256,maximum=768,value=512,step=8)
-            height = gr.Slider(label="image height",minimum=256,maximum=768,value=768,step=8)
             width.release(lambda x,y: min(1280-x,y), inputs=[width,height], outputs=[height])
             height.release(lambda x,y: min(1280-y,x), inputs=[width,height], outputs=[width])
             merge_steps = gr.Slider(label="step starting to merge facial details(30 is recommended)",minimum=10,maximum=50,value=30,step=1)
@@ -153,6 +163,6 @@ with gr.Blocks(title="ConsistentID Demo") as demo:
                 - Due to insufficient graphics memory on the demo server, there is an upper limit on the resolution for generating samples. We will support the generation of SDXL as soon as possible<br/><br/>
                 ''')
         btn.click(fn=process, inputs=[selected_template_images, custom_image,prompt, nagetive_prompt, prompt_selected,
-                                      model_selected_tab, prompt_selected_tab, width, height, merge_steps, seed_set], outputs=out)
 demo.launch(server_name='0.0.0.0', ssl_verify=False)

 pipe = ConsistentIDPipeline.from_pretrained(
     args.base_model_path,
     torch_dtype=torch.float16,
+)
 ### Load consistentID_model checkpoint
 pipe.load_ConsistentID_model(
     bise_net_weight_path="./models/BiSeNet_pretrained_for_ConsistentID.pth",
 )
 pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
+pipe = pipe.to(device, torch.float16)
 @spaces.GPU
 def process(selected_template_images, custom_image, prompt,
             negative_prompt, prompt_selected, model_selected_tab,
+            prompt_selected_tab, guidance_scale, width, height, merge_steps, seed_set):
     # The gradio UI only supports one image at a time.
     if model_selected_tab==0:
         negative_prompt=negative_prompt,
         num_images_per_prompt=1,
         num_inference_steps=num_steps,
+        guidance_scale=guidance_scale,
         start_merge_step=merge_steps,
         generator=generator,
     ).images[0]
                 prompt_selected_tabs = [template_prompts_tab, custom_prompt_tab]
                 for i, tab in enumerate(prompt_selected_tabs):
                     tab.select(fn=lambda tabnum=i: tabnum, inputs=[], outputs=[prompt_selected_tab])
+            guidance_scale = gr.Slider(
+                label="Guidance scale",
+                minimum=1.0,
+                maximum=10.0,
+                step=1.0,
+                value=5.0,
+            )
             width = gr.Slider(label="image width",minimum=256,maximum=768,value=512,step=8)
+            height = gr.Slider(label="image height",minimum=256,maximum=768,value=512,step=8)
             width.release(lambda x,y: min(1280-x,y), inputs=[width,height], outputs=[height])
             height.release(lambda x,y: min(1280-y,x), inputs=[width,height], outputs=[width])
             merge_steps = gr.Slider(label="step starting to merge facial details(30 is recommended)",minimum=10,maximum=50,value=30,step=1)
                 - Due to insufficient graphics memory on the demo server, there is an upper limit on the resolution for generating samples. We will support the generation of SDXL as soon as possible<br/><br/>
                 ''')
         btn.click(fn=process, inputs=[selected_template_images, custom_image,prompt, nagetive_prompt, prompt_selected,
+                                      model_selected_tab, prompt_selected_tab, guidance_scale, width, height, merge_steps, seed_set], outputs=out)
 demo.launch(server_name='0.0.0.0', ssl_verify=False)

lib/pipline_ConsistentID.py CHANGED Viewed

@@ -412,9 +412,6 @@ class ConsistentIDPipeline(StableDiffusionPipeline):
         # 6. Get the update text embedding
         # parsed_image_parts2: the facial areas of the input image
-        # text_local_id_embeds: [1, 77, 768]
-        # text_local_id_embeds only differs with text_global_id_embeds on 4 tokens, and is identical
-        # to text_global_id_embeds on the rest 73 tokens.
         # extract_local_facial_embeds() maps parsed_image_parts2 to multi_facial_embeds, and then replaces the class tokens in prompt_embeds
         # with the fused (id_embeds, prompt_embeds[class_tokens_mask]) whose indices are specified by class_tokens_mask.
         # parsed_image_parts2: [1, 5, 3, 224, 224]
@@ -424,6 +421,8 @@ class ConsistentIDPipeline(StableDiffusionPipeline):
                                              calc_uncond=calc_uncond)
         # text_global_id_embeds, text_local_global_id_embeds: [1, 81, 768]
         text_global_id_embeds         = torch.cat([text_embeds,          global_id_embeds], dim=1)
         text_local_global_id_embeds   = torch.cat([text_local_id_embeds, global_id_embeds], dim=1)

         # 6. Get the update text embedding
         # parsed_image_parts2: the facial areas of the input image
         # extract_local_facial_embeds() maps parsed_image_parts2 to multi_facial_embeds, and then replaces the class tokens in prompt_embeds
         # with the fused (id_embeds, prompt_embeds[class_tokens_mask]) whose indices are specified by class_tokens_mask.
         # parsed_image_parts2: [1, 5, 3, 224, 224]
                                              calc_uncond=calc_uncond)
         # text_global_id_embeds, text_local_global_id_embeds: [1, 81, 768]
+        # text_local_id_embeds: [1, 77, 768], only differs with text_embeds on 4 ID embeddings, and is identical
+        # to text_embeds on the rest 73 tokens.
         text_global_id_embeds         = torch.cat([text_embeds,          global_id_embeds], dim=1)
         text_local_global_id_embeds   = torch.cat([text_local_id_embeds, global_id_embeds], dim=1)