Commit
•
69792a3
1
Parent(s):
2ca8ba0
update README (#1)
Browse files- update README (ac8f9e356081e6d322fadc4eb23908edba9a1e5b)
Co-authored-by: Will Berman <[email protected]>
- README.md +18 -307
- images/bag.png +0 -0
- images/bag_scribble.png +0 -0
- images/bag_scribble_out.png +0 -0
- images/bird.png +0 -3
- images/bird_canny.png +0 -0
- images/bird_canny_out.png +0 -0
- images/chef_pose_out.png +0 -0
- images/house_seg_out.png +0 -0
- images/man.png +0 -0
- images/man_hed.png +0 -0
- images/man_hed_out.png +0 -0
- images/openpose.png +0 -0
- images/pose.png +0 -0
- images/room.png +0 -0
- images/room_mlsd.png +0 -0
- images/room_mlsd_out.png +0 -0
- images/stormtrooper.png +0 -0
- images/stormtrooper_depth.png +0 -0
- images/stormtrooper_depth_out.png +0 -0
- images/toy.png +0 -0
- images/toy_normal.png +0 -0
- images/toy_normal_out.png +0 -0
README.md
CHANGED
@@ -18,136 +18,12 @@ Controlnet's auxiliary models are trained with stable diffusion 1.5. Experimenta
|
|
18 |
The auxiliary conditioning is passed directly to the diffusers pipeline. If you want to process an image to create the auxiliary conditioning, external dependencies are required.
|
19 |
|
20 |
Some of the additional conditionings can be extracted from images via additional models. We extracted these
|
21 |
-
additional models from the original controlnet repo into a separate package that can be found on [github](https://github.com/patrickvonplaten/
|
22 |
-
|
23 |
-
## Canny edge detection
|
24 |
-
|
25 |
-
Install opencv
|
26 |
-
|
27 |
-
```sh
|
28 |
-
$ pip install opencv-contrib-python
|
29 |
-
```
|
30 |
-
|
31 |
-
```python
|
32 |
-
import cv2
|
33 |
-
from PIL import Image
|
34 |
-
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
|
35 |
-
import torch
|
36 |
-
import numpy as np
|
37 |
-
|
38 |
-
image = Image.open('images/bird.png')
|
39 |
-
image = np.array(image)
|
40 |
-
|
41 |
-
low_threshold = 100
|
42 |
-
high_threshold = 200
|
43 |
-
|
44 |
-
image = cv2.Canny(image, low_threshold, high_threshold)
|
45 |
-
image = image[:, :, None]
|
46 |
-
image = np.concatenate([image, image, image], axis=2)
|
47 |
-
image = Image.fromarray(image)
|
48 |
-
|
49 |
-
controlnet = ControlNetModel.from_pretrained(
|
50 |
-
"fusing/stable-diffusion-v1-5-controlnet-canny",
|
51 |
-
)
|
52 |
-
|
53 |
-
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
54 |
-
"runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None
|
55 |
-
)
|
56 |
-
pipe.to('cuda')
|
57 |
-
|
58 |
-
image = pipe("bird", image).images[0]
|
59 |
-
|
60 |
-
image.save('images/bird_canny_out.png')
|
61 |
-
```
|
62 |
-
|
63 |
-
![bird](./images/bird.png)
|
64 |
-
|
65 |
-
![bird_canny](./images/bird_canny.png)
|
66 |
-
|
67 |
-
![bird_canny_out](./images/bird_canny_out.png)
|
68 |
-
|
69 |
-
## M-LSD Straight line detection
|
70 |
-
|
71 |
-
Install the additional controlnet models package.
|
72 |
-
|
73 |
-
```sh
|
74 |
-
$ pip install git+https://github.com/patrickvonplaten/human_pose.git
|
75 |
-
```
|
76 |
-
|
77 |
-
```py
|
78 |
-
from PIL import Image
|
79 |
-
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
|
80 |
-
import torch
|
81 |
-
from human_pose import MLSDdetector
|
82 |
-
|
83 |
-
mlsd = MLSDdetector.from_pretrained('lllyasviel/ControlNet')
|
84 |
-
|
85 |
-
image = Image.open('images/room.png')
|
86 |
-
|
87 |
-
image = mlsd(image)
|
88 |
-
|
89 |
-
controlnet = ControlNetModel.from_pretrained(
|
90 |
-
"fusing/stable-diffusion-v1-5-controlnet-mlsd",
|
91 |
-
)
|
92 |
-
|
93 |
-
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
94 |
-
"runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None
|
95 |
-
)
|
96 |
-
pipe.to('cuda')
|
97 |
-
|
98 |
-
image = pipe("room", image).images[0]
|
99 |
-
|
100 |
-
image.save('images/room_mlsd_out.png')
|
101 |
-
```
|
102 |
-
|
103 |
-
![room](./images/room.png)
|
104 |
-
|
105 |
-
![room_mlsd](./images/room_mlsd.png)
|
106 |
-
|
107 |
-
![room_mlsd_out](./images/room_mlsd_out.png)
|
108 |
-
|
109 |
-
## Pose estimation
|
110 |
-
|
111 |
-
Install the additional controlnet models package.
|
112 |
-
|
113 |
-
```sh
|
114 |
-
$ pip install git+https://github.com/patrickvonplaten/human_pose.git
|
115 |
-
```
|
116 |
-
|
117 |
-
```py
|
118 |
-
from PIL import Image
|
119 |
-
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
|
120 |
-
import torch
|
121 |
-
from human_pose import OpenposeDetector
|
122 |
-
|
123 |
-
openpose = OpenposeDetector.from_pretrained('lllyasviel/ControlNet')
|
124 |
-
|
125 |
-
image = Image.open('images/pose.png')
|
126 |
-
|
127 |
-
image = openpose(image)
|
128 |
-
|
129 |
-
controlnet = ControlNetModel.from_pretrained(
|
130 |
-
"fusing/stable-diffusion-v1-5-controlnet-openpose",
|
131 |
-
)
|
132 |
-
|
133 |
-
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
134 |
-
"runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None
|
135 |
-
)
|
136 |
-
pipe.to('cuda')
|
137 |
-
|
138 |
-
image = pipe("chef in the kitchen", image).images[0]
|
139 |
-
|
140 |
-
image.save('images/chef_pose_out.png')
|
141 |
-
```
|
142 |
-
|
143 |
-
![pose](./images/pose.png)
|
144 |
-
|
145 |
-
![openpose](./images/openpose.png)
|
146 |
-
|
147 |
-
![chef_pose_out](./images/chef_pose_out.png)
|
148 |
|
149 |
## Semantic Segmentation
|
150 |
|
|
|
|
|
151 |
Semantic segmentation relies on transformers. Transformers is a
|
152 |
dependency of diffusers for running controlnet, so you should
|
153 |
have it installed already.
|
@@ -158,7 +34,7 @@ from PIL import Image
|
|
158 |
import numpy as np
|
159 |
from controlnet_utils import ade_palette
|
160 |
import torch
|
161 |
-
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
|
162 |
|
163 |
image_processor = AutoImageProcessor.from_pretrained("openmmlab/upernet-convnext-small")
|
164 |
image_segmentor = UperNetForSemanticSegmentation.from_pretrained("openmmlab/upernet-convnext-small")
|
@@ -184,15 +60,23 @@ color_seg = color_seg.astype(np.uint8)
|
|
184 |
image = Image.fromarray(color_seg)
|
185 |
|
186 |
controlnet = ControlNetModel.from_pretrained(
|
187 |
-
"fusing/stable-diffusion-v1-5-controlnet-seg",
|
188 |
)
|
189 |
|
190 |
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
191 |
-
"runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None
|
192 |
)
|
193 |
-
pipe.to('cuda')
|
194 |
|
195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
|
197 |
image.save('./images/house_seg_out.png')
|
198 |
```
|
@@ -203,179 +87,6 @@ image.save('./images/house_seg_out.png')
|
|
203 |
|
204 |
![house_seg_out](images/house_seg_out.png)
|
205 |
|
206 |
-
|
207 |
-
|
208 |
-
Depth control relies on transformers. Transformers is a dependency of diffusers for running controlnet, so
|
209 |
-
you should have it installed already.
|
210 |
-
|
211 |
-
```py
|
212 |
-
from transformers import pipeline
|
213 |
-
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
|
214 |
-
from PIL import Image
|
215 |
-
import numpy as np
|
216 |
-
|
217 |
-
depth_estimator = pipeline('depth-estimation')
|
218 |
-
|
219 |
-
image = Image.open('./images/stormtrooper.png')
|
220 |
-
image = depth_estimator(image)['depth']
|
221 |
-
image = np.array(image)
|
222 |
-
image = image[:, :, None]
|
223 |
-
image = np.concatenate([image, image, image], axis=2)
|
224 |
-
image = Image.fromarray(image)
|
225 |
-
|
226 |
-
controlnet = ControlNetModel.from_pretrained(
|
227 |
-
"fusing/stable-diffusion-v1-5-controlnet-depth",
|
228 |
-
)
|
229 |
-
|
230 |
-
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
231 |
-
"runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None
|
232 |
-
)
|
233 |
-
pipe.to('cuda')
|
234 |
-
|
235 |
-
image = pipe("Stormtrooper's lecture", image).images[0]
|
236 |
-
|
237 |
-
image.save('./images/stormtrooper_depth_out.png')
|
238 |
-
```
|
239 |
-
|
240 |
-
![stormtrooper](./images/stormtrooper.png)
|
241 |
-
|
242 |
-
![stormtrooler_depth](./images/stormtrooper_depth.png)
|
243 |
-
|
244 |
-
![stormtrooler_depth_out](./images/stormtrooper_depth_out.png)
|
245 |
-
|
246 |
-
|
247 |
-
## Normal map
|
248 |
-
|
249 |
-
```py
|
250 |
-
from PIL import Image
|
251 |
-
from transformers import pipeline
|
252 |
-
import numpy as np
|
253 |
-
import cv2
|
254 |
-
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
|
255 |
-
|
256 |
-
image = Image.open("images/toy.png").convert("RGB")
|
257 |
-
|
258 |
-
depth_estimator = pipeline("depth-estimation", model ="Intel/dpt-hybrid-midas" )
|
259 |
-
|
260 |
-
image = depth_estimator(image)['predicted_depth'][0]
|
261 |
-
|
262 |
-
image = image.numpy()
|
263 |
-
|
264 |
-
image_depth = image.copy()
|
265 |
-
image_depth -= np.min(image_depth)
|
266 |
-
image_depth /= np.max(image_depth)
|
267 |
-
|
268 |
-
bg_threhold = 0.4
|
269 |
-
|
270 |
-
x = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=3)
|
271 |
-
x[image_depth < bg_threhold] = 0
|
272 |
-
|
273 |
-
y = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=3)
|
274 |
-
y[image_depth < bg_threhold] = 0
|
275 |
-
|
276 |
-
z = np.ones_like(x) * np.pi * 2.0
|
277 |
-
|
278 |
-
image = np.stack([x, y, z], axis=2)
|
279 |
-
image /= np.sum(image ** 2.0, axis=2, keepdims=True) ** 0.5
|
280 |
-
image = (image * 127.5 + 127.5).clip(0, 255).astype(np.uint8)
|
281 |
-
image = Image.fromarray(image)
|
282 |
-
|
283 |
-
controlnet = ControlNetModel.from_pretrained(
|
284 |
-
"fusing/stable-diffusion-v1-5-controlnet-normal",
|
285 |
-
)
|
286 |
-
|
287 |
-
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
288 |
-
"runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None
|
289 |
-
)
|
290 |
-
pipe.to('cuda')
|
291 |
-
|
292 |
-
image = pipe("cute toy", image).images[0]
|
293 |
-
|
294 |
-
image.save('images/toy_normal_out.png')
|
295 |
-
```
|
296 |
-
|
297 |
-
![toy](./images/toy.png)
|
298 |
-
|
299 |
-
![toy_normal](./images/toy_normal.png)
|
300 |
-
|
301 |
-
![toy_normal_out](./images/toy_normal_out.png)
|
302 |
-
|
303 |
-
## Scribble
|
304 |
-
|
305 |
-
Install the additional controlnet models package.
|
306 |
-
|
307 |
-
```sh
|
308 |
-
$ pip install git+https://github.com/patrickvonplaten/human_pose.git
|
309 |
-
```
|
310 |
-
|
311 |
-
```py
|
312 |
-
from PIL import Image
|
313 |
-
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
|
314 |
-
import torch
|
315 |
-
from human_pose import HEDdetector
|
316 |
-
|
317 |
-
hed = HEDdetector.from_pretrained('lllyasviel/ControlNet')
|
318 |
-
|
319 |
-
image = Image.open('images/bag.png')
|
320 |
-
|
321 |
-
image = hed(image, scribble=True)
|
322 |
-
|
323 |
-
controlnet = ControlNetModel.from_pretrained(
|
324 |
-
"fusing/stable-diffusion-v1-5-controlnet-scribble",
|
325 |
-
)
|
326 |
-
|
327 |
-
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
328 |
-
"runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None
|
329 |
-
)
|
330 |
-
pipe.to('cuda')
|
331 |
-
|
332 |
-
image = pipe("bag", image).images[0]
|
333 |
-
|
334 |
-
image.save('images/bag_scribble_out.png')
|
335 |
-
```
|
336 |
-
|
337 |
-
![bag](./images/bag.png)
|
338 |
-
|
339 |
-
![bag_scribble](./images/bag_scribble.png)
|
340 |
-
|
341 |
-
![bag_scribble_out](./images/bag_scribble_out.png)
|
342 |
-
|
343 |
-
## HED Boundary
|
344 |
-
|
345 |
-
Install the additional controlnet models package.
|
346 |
-
|
347 |
-
```sh
|
348 |
-
$ pip install git+https://github.com/patrickvonplaten/human_pose.git
|
349 |
-
```
|
350 |
-
|
351 |
-
```py
|
352 |
-
from PIL import Image
|
353 |
-
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
|
354 |
-
import torch
|
355 |
-
from human_pose import HEDdetector
|
356 |
-
|
357 |
-
hed = HEDdetector.from_pretrained('lllyasviel/ControlNet')
|
358 |
-
|
359 |
-
image = Image.open('images/man.png')
|
360 |
-
|
361 |
-
image = hed(image)
|
362 |
-
|
363 |
-
controlnet = ControlNetModel.from_pretrained(
|
364 |
-
"fusing/stable-diffusion-v1-5-controlnet-hed",
|
365 |
-
)
|
366 |
-
|
367 |
-
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
368 |
-
"runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None
|
369 |
-
)
|
370 |
-
pipe.to('cuda')
|
371 |
-
|
372 |
-
image = pipe("oil painting of handsome old man, masterpiece", image).images[0]
|
373 |
-
|
374 |
-
image.save('images/man_hed_out.png')
|
375 |
-
```
|
376 |
-
|
377 |
-
![man](./images/man.png)
|
378 |
-
|
379 |
-
![man_hed](./images/man_hed.png)
|
380 |
|
381 |
-
|
|
|
18 |
The auxiliary conditioning is passed directly to the diffusers pipeline. If you want to process an image to create the auxiliary conditioning, external dependencies are required.
|
19 |
|
20 |
Some of the additional conditionings can be extracted from images via additional models. We extracted these
|
21 |
+
additional models from the original controlnet repo into a separate package that can be found on [github](https://github.com/patrickvonplaten/controlnet_aux.git).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
## Semantic Segmentation
|
24 |
|
25 |
+
### Diffusers
|
26 |
+
|
27 |
Semantic segmentation relies on transformers. Transformers is a
|
28 |
dependency of diffusers for running controlnet, so you should
|
29 |
have it installed already.
|
|
|
34 |
import numpy as np
|
35 |
from controlnet_utils import ade_palette
|
36 |
import torch
|
37 |
+
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
|
38 |
|
39 |
image_processor = AutoImageProcessor.from_pretrained("openmmlab/upernet-convnext-small")
|
40 |
image_segmentor = UperNetForSemanticSegmentation.from_pretrained("openmmlab/upernet-convnext-small")
|
|
|
60 |
image = Image.fromarray(color_seg)
|
61 |
|
62 |
controlnet = ControlNetModel.from_pretrained(
|
63 |
+
"fusing/stable-diffusion-v1-5-controlnet-seg", torch_dtype=torch.float16
|
64 |
)
|
65 |
|
66 |
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
67 |
+
"runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16
|
68 |
)
|
|
|
69 |
|
70 |
+
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
|
71 |
+
|
72 |
+
# Remove if you do not have xformers installed
|
73 |
+
# see https://huggingface.co/docs/diffusers/v0.13.0/en/optimization/xformers#installing-xformers
|
74 |
+
# for installation instructions
|
75 |
+
pipe.enable_xformers_memory_efficient_attention()
|
76 |
+
|
77 |
+
pipe.enable_model_cpu_offload()
|
78 |
+
|
79 |
+
image = pipe("house", image, num_inference_steps=20).images[0]
|
80 |
|
81 |
image.save('./images/house_seg_out.png')
|
82 |
```
|
|
|
87 |
|
88 |
![house_seg_out](images/house_seg_out.png)
|
89 |
|
90 |
+
### Training
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
+
The semantic segmentation model was trained on 164K segmentation-image, caption pairs from ADE20K. The model was trained for 200 GPU-hours with Nvidia A100 80G using Stable Diffusion 1.5 as a base model.
|
images/bag.png
DELETED
Binary file (462 kB)
|
|
images/bag_scribble.png
DELETED
Binary file (11 kB)
|
|
images/bag_scribble_out.png
DELETED
Binary file (556 kB)
|
|
images/bird.png
DELETED
Git LFS Details
|
images/bird_canny.png
DELETED
Binary file (29.1 kB)
|
|
images/bird_canny_out.png
DELETED
Binary file (845 kB)
|
|
images/chef_pose_out.png
DELETED
Binary file (570 kB)
|
|
images/house_seg_out.png
CHANGED
images/man.png
DELETED
Binary file (773 kB)
|
|
images/man_hed.png
DELETED
Binary file (118 kB)
|
|
images/man_hed_out.png
DELETED
Binary file (737 kB)
|
|
images/openpose.png
DELETED
Binary file (6.55 kB)
|
|
images/pose.png
DELETED
Binary file (592 kB)
|
|
images/room.png
DELETED
Binary file (637 kB)
|
|
images/room_mlsd.png
DELETED
Binary file (9.06 kB)
|
|
images/room_mlsd_out.png
DELETED
Binary file (575 kB)
|
|
images/stormtrooper.png
DELETED
Binary file (218 kB)
|
|
images/stormtrooper_depth.png
DELETED
Binary file (54.1 kB)
|
|
images/stormtrooper_depth_out.png
DELETED
Binary file (343 kB)
|
|
images/toy.png
DELETED
Binary file (312 kB)
|
|
images/toy_normal.png
DELETED
Binary file (90.1 kB)
|
|
images/toy_normal_out.png
DELETED
Binary file (231 kB)
|
|