svjack commited on
Commit
c16a841
·
verified ·
1 Parent(s): 45e2d99

Create canny_app.py

Browse files
Files changed (1) hide show
  1. canny_app.py +216 -0
canny_app.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import torch
3
+ import cv2
4
+ import gradio as gr
5
+ import numpy as np
6
+ from huggingface_hub import snapshot_download
7
+ from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
8
+ from diffusers.utils import load_image
9
+ from kolors.pipelines.pipeline_controlnet_xl_kolors_img2img import StableDiffusionXLControlNetImg2ImgPipeline
10
+ from kolors.models.modeling_chatglm import ChatGLMModel
11
+ from kolors.models.tokenization_chatglm import ChatGLMTokenizer
12
+ from kolors.models.controlnet import ControlNetModel
13
+ from diffusers import AutoencoderKL
14
+ from kolors.models.unet_2d_condition import UNet2DConditionModel
15
+ from diffusers import EulerDiscreteScheduler
16
+ from PIL import Image
17
+ from annotator.util import resize_image, HWC3
18
+
19
+ device = "cuda"
20
+ ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
21
+ ckpt_dir_ipa = snapshot_download(repo_id="Kwai-Kolors/Kolors-IP-Adapter-Plus")
22
+ ckpt_dir_canny = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Canny")
23
+
24
+ text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
25
+ tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
26
+ vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
27
+ scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
28
+ unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
29
+
30
+ controlnet_canny = ControlNetModel.from_pretrained(f"{ckpt_dir_canny}", revision=None).half().to(device)
31
+
32
+ image_encoder = CLIPVisionModelWithProjection.from_pretrained(f'{ckpt_dir_ipa}/image_encoder', ignore_mismatched_sizes=True).to(dtype=torch.float16, device=device)
33
+ ip_img_size = 336
34
+ clip_image_processor = CLIPImageProcessor(size=ip_img_size, crop_size=ip_img_size)
35
+
36
+ pipe_canny = StableDiffusionXLControlNetImg2ImgPipeline(
37
+ vae=vae,
38
+ controlnet=controlnet_canny,
39
+ text_encoder=text_encoder,
40
+ tokenizer=tokenizer,
41
+ unet=unet,
42
+ scheduler=scheduler,
43
+ image_encoder=image_encoder,
44
+ feature_extractor=clip_image_processor,
45
+ force_zeros_for_empty_prompt=False
46
+ )
47
+
48
+ pipe_canny.load_ip_adapter(f'{ckpt_dir_ipa}', subfolder="", weight_name=["ip_adapter_plus_general.bin"])
49
+
50
+ def process_canny_condition(image, canny_threods=[100, 200]):
51
+ np_image = image.copy()
52
+ np_image = cv2.Canny(np_image, canny_threods[0], canny_threods[1])
53
+ np_image = np_image[:, :, None]
54
+ np_image = np.concatenate([np_image, np_image, np_image], axis=2)
55
+ np_image = HWC3(np_image)
56
+ return Image.fromarray(np_image)
57
+
58
+ MAX_SEED = np.iinfo(np.int32).max
59
+ MAX_IMAGE_SIZE = 1024
60
+
61
+ def infer_canny(prompt,
62
+ image=None,
63
+ ipa_img=None,
64
+ negative_prompt="nsfw,脸部阴影,低分辨率,糟糕的解剖结构、糟糕的手,缺失手指、质量最差、低质量、jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
65
+ seed=66,
66
+ randomize_seed=False,
67
+ guidance_scale=5.0,
68
+ num_inference_steps=50,
69
+ controlnet_conditioning_scale=0.5,
70
+ control_guidance_end=0.9,
71
+ strength=1.0,
72
+ ip_scale=0.5,
73
+ ):
74
+ if randomize_seed:
75
+ seed = random.randint(0, MAX_SEED)
76
+ generator = torch.Generator().manual_seed(seed)
77
+ init_image = resize_image(image, MAX_IMAGE_SIZE)
78
+ pipe = pipe_canny.to("cuda")
79
+ pipe.set_ip_adapter_scale([ip_scale])
80
+ condi_img = process_canny_condition(np.array(init_image))
81
+ image = pipe(
82
+ prompt=prompt,
83
+ image=init_image,
84
+ controlnet_conditioning_scale=controlnet_conditioning_scale,
85
+ control_guidance_end=control_guidance_end,
86
+ ip_adapter_image=[ipa_img],
87
+ strength=strength,
88
+ control_image=condi_img,
89
+ negative_prompt=negative_prompt,
90
+ num_inference_steps=num_inference_steps,
91
+ guidance_scale=guidance_scale,
92
+ num_images_per_prompt=1,
93
+ generator=generator,
94
+ ).images[0]
95
+ return [condi_img, image], seed
96
+
97
+ canny_examples = [
98
+ ["一个红色头发的女孩,唯美风景,清新明亮,斑驳的光影,最好的质量,超细节,8K画质",
99
+ "image/woman_2.png", "image/2.png"],
100
+ ]
101
+
102
+ css = """
103
+ #col-left {
104
+ margin: 0 auto;
105
+ max-width: 600px;
106
+ }
107
+ #col-right {
108
+ margin: 0 auto;
109
+ max-width: 750px;
110
+ }
111
+ #button {
112
+ color: blue;
113
+ }
114
+ """
115
+
116
+ def load_description(fp):
117
+ with open(fp, 'r', encoding='utf-8') as f:
118
+ content = f.read()
119
+ return content
120
+
121
+ with gr.Blocks(css=css) as CannyApp:
122
+ gr.HTML(load_description("assets/title.md"))
123
+ with gr.Row():
124
+ with gr.Column(elem_id="col-left"):
125
+ with gr.Row():
126
+ prompt = gr.Textbox(
127
+ label="Prompt",
128
+ placeholder="Enter your prompt",
129
+ lines=2
130
+ )
131
+ with gr.Row():
132
+ image = gr.Image(label="Image", type="pil")
133
+ ipa_image = gr.Image(label="IP-Adapter-Image", type="pil")
134
+ with gr.Accordion("Advanced Settings", open=False):
135
+ negative_prompt = gr.Textbox(
136
+ label="Negative prompt",
137
+ placeholder="Enter a negative prompt",
138
+ visible=True,
139
+ value="nsfw,脸部阴影,低分辨率,糟糕的解剖结构、糟糕的手,缺失手指、质量最差、低质量、jpeg伪影、模糊、糟糕,黑脸,霓虹灯"
140
+ )
141
+ seed = gr.Slider(
142
+ label="Seed",
143
+ minimum=0,
144
+ maximum=MAX_SEED,
145
+ step=1,
146
+ value=0,
147
+ )
148
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
149
+ with gr.Row():
150
+ guidance_scale = gr.Slider(
151
+ label="Guidance scale",
152
+ minimum=0.0,
153
+ maximum=10.0,
154
+ step=0.1,
155
+ value=5.0,
156
+ )
157
+ num_inference_steps = gr.Slider(
158
+ label="Number of inference steps",
159
+ minimum=10,
160
+ maximum=50,
161
+ step=1,
162
+ value=30,
163
+ )
164
+ with gr.Row():
165
+ controlnet_conditioning_scale = gr.Slider(
166
+ label="Controlnet Conditioning Scale",
167
+ minimum=0.0,
168
+ maximum=1.0,
169
+ step=0.1,
170
+ value=0.5,
171
+ )
172
+ control_guidance_end = gr.Slider(
173
+ label="Control Guidance End",
174
+ minimum=0.0,
175
+ maximum=1.0,
176
+ step=0.1,
177
+ value=0.9,
178
+ )
179
+ with gr.Row():
180
+ strength = gr.Slider(
181
+ label="Strength",
182
+ minimum=0.0,
183
+ maximum=1.0,
184
+ step=0.1,
185
+ value=1.0,
186
+ )
187
+ ip_scale = gr.Slider(
188
+ label="IP_Scale",
189
+ minimum=0.0,
190
+ maximum=1.0,
191
+ step=0.1,
192
+ value=0.5,
193
+ )
194
+ with gr.Row():
195
+ canny_button = gr.Button("Canny", elem_id="button")
196
+
197
+ with gr.Column(elem_id="col-right"):
198
+ result = gr.Gallery(label="Result", show_label=False, columns=2)
199
+ seed_used = gr.Number(label="Seed Used")
200
+
201
+ with gr.Row():
202
+ gr.Examples(
203
+ fn=infer_canny,
204
+ examples=canny_examples,
205
+ inputs=[prompt, image, ipa_image],
206
+ outputs=[result, seed_used],
207
+ label="Canny"
208
+ )
209
+
210
+ canny_button.click(
211
+ fn=infer_canny,
212
+ inputs=[prompt, image, ipa_image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength, ip_scale],
213
+ outputs=[result, seed_used]
214
+ )
215
+
216
+ CannyApp.queue().launch(debug=True, share=True)