nickyreinert-vml
commited on
Commit
·
f7b4ac7
1
Parent(s):
3c2eedc
adding "attention_slicing" method
Browse files
README.md
CHANGED
@@ -61,6 +61,8 @@ This allows you to select any model hosted on Huggingface. Some models are fine-
|
|
61 |
|
62 |
The pipeline supports a way to prevent nswf-content to be created. I figured this does not always work properly, so those to options allow you to disable this feature.
|
63 |
|
|
|
|
|
64 |
## Scheduler/Solver
|
65 |
|
66 |
This is the part of the process, that manipulates the output from the model every loop/epoch.
|
|
|
61 |
|
62 |
The pipeline supports a way to prevent nswf-content to be created. I figured this does not always work properly, so those to options allow you to disable this feature.
|
63 |
|
64 |
+
**Attention slicing** divides attention operation into multiple steps, instead of one huge step. On machines with memory below 64 GByte or for images bigger than 512x512 pixels, this may increase performance drastically. On Apple's Silicon (M1, M1), it's recommend to keep this setting enabled. See https://huggingface.co/docs/diffusers/optimization/mps
|
65 |
+
|
66 |
## Scheduler/Solver
|
67 |
|
68 |
This is the part of the process, that manipulates the output from the model every loop/epoch.
|
app.py
CHANGED
@@ -274,6 +274,7 @@ def run_inference(config, config_history, pipeline, progress=gr.Progress(track_t
|
|
274 |
|
275 |
if str(config["model"]) != 'None' and str(config["model"]) != 'null' and str(config["model"]) != '' and str(config["scheduler"]) != 'None':
|
276 |
|
|
|
277 |
progress(1, desc="Initializing pipeline...")
|
278 |
|
279 |
torch.cuda.empty_cache()
|
@@ -292,6 +293,9 @@ def run_inference(config, config_history, pipeline, progress=gr.Progress(track_t
|
|
292 |
if str(config["cpu_offload"]).lower() != 'false':
|
293 |
pipeline.enable_model_cpu_offload()
|
294 |
|
|
|
|
|
|
|
295 |
# AUTO ENCODER
|
296 |
if str(config["auto_encoder"]).lower() != 'none' and str(config["auto_encoder"]).lower() != 'null':
|
297 |
pipeline.vae = AutoencoderKL.from_pretrained(config["auto_encoder"], torch_dtype=get_data_type(config["data_type"])).to(config["device"])
|
@@ -439,6 +443,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
439 |
in_data_type = gr.Radio(label="Data Type:", value=config.value["data_type"], choices=["bfloat16", "float16", "float32"], info="`bfloat16` is not supported on MPS devices right now; `float16` may also not be supported on all devices, Half-precision weights, will save GPU memory, see https://huggingface.co/docs/diffusers/main/en/optimization/fp16")
|
440 |
in_allow_tensorfloat32 = gr.Radio(label="Allow TensorFloat32:", value=config.value["allow_tensorfloat32"], choices=["True", "False"], info="is not supported on MPS devices right now; use TensorFloat-32 is faster, but results in slightly less accurate computations, see https://huggingface.co/docs/diffusers/main/en/optimization/fp16 ")
|
441 |
in_variant = gr.Radio(label="Variant:", value=config.value["variant"], choices=["fp16", None], info="Use half-precision weights will save GPU memory, not all models support that, see https://huggingface.co/docs/diffusers/main/en/optimization/fp16 ")
|
|
|
442 |
|
443 |
gr.Markdown("### Model specific settings")
|
444 |
with gr.Row():
|
@@ -535,6 +540,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
535 |
in_data_type.change(data_type_change, inputs=[in_data_type, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('data_type', value, config)")
|
536 |
in_allow_tensorfloat32.change(tensorfloat32_change, inputs=[in_allow_tensorfloat32, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('allow_tensorfloat32', value, config)")
|
537 |
in_variant.change(variant_change, inputs=[in_variant, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('variant', value, config)")
|
|
|
538 |
in_model_refiner.change(model_refiner_change, inputs=[in_model_refiner, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('model_refiner', value, config)")
|
539 |
in_cpu_offload.change(cpu_offload_change, inputs=[in_cpu_offload, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('cpu_offload', value, config)")
|
540 |
in_safety_checker.change(safety_checker_change, inputs=[in_safety_checker, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('safety_checker', value, config)")
|
@@ -567,6 +573,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
567 |
in_data_type,
|
568 |
in_model_refiner,
|
569 |
in_variant,
|
|
|
570 |
in_safety_checker,
|
571 |
in_requires_safety_checker,
|
572 |
in_auto_encoders,
|
|
|
274 |
|
275 |
if str(config["model"]) != 'None' and str(config["model"]) != 'null' and str(config["model"]) != '' and str(config["scheduler"]) != 'None':
|
276 |
|
277 |
+
print(config["model"])
|
278 |
progress(1, desc="Initializing pipeline...")
|
279 |
|
280 |
torch.cuda.empty_cache()
|
|
|
293 |
if str(config["cpu_offload"]).lower() != 'false':
|
294 |
pipeline.enable_model_cpu_offload()
|
295 |
|
296 |
+
# ATTENTION SLICING
|
297 |
+
if str(config["attention_slicing"]).lower() == 'true': pipeline.enable_attention_slicing()
|
298 |
+
|
299 |
# AUTO ENCODER
|
300 |
if str(config["auto_encoder"]).lower() != 'none' and str(config["auto_encoder"]).lower() != 'null':
|
301 |
pipeline.vae = AutoencoderKL.from_pretrained(config["auto_encoder"], torch_dtype=get_data_type(config["data_type"])).to(config["device"])
|
|
|
443 |
in_data_type = gr.Radio(label="Data Type:", value=config.value["data_type"], choices=["bfloat16", "float16", "float32"], info="`bfloat16` is not supported on MPS devices right now; `float16` may also not be supported on all devices, Half-precision weights, will save GPU memory, see https://huggingface.co/docs/diffusers/main/en/optimization/fp16")
|
444 |
in_allow_tensorfloat32 = gr.Radio(label="Allow TensorFloat32:", value=config.value["allow_tensorfloat32"], choices=["True", "False"], info="is not supported on MPS devices right now; use TensorFloat-32 is faster, but results in slightly less accurate computations, see https://huggingface.co/docs/diffusers/main/en/optimization/fp16 ")
|
445 |
in_variant = gr.Radio(label="Variant:", value=config.value["variant"], choices=["fp16", None], info="Use half-precision weights will save GPU memory, not all models support that, see https://huggingface.co/docs/diffusers/main/en/optimization/fp16 ")
|
446 |
+
in_attention_slicing = gr.Radio(label="Attention slicing:", value=config.value["attention_slicing"], choices=["True", "False"], info="Attention operation will be cutted into multiple steps, see https://huggingface.co/docs/diffusers/optimization/mps")
|
447 |
|
448 |
gr.Markdown("### Model specific settings")
|
449 |
with gr.Row():
|
|
|
540 |
in_data_type.change(data_type_change, inputs=[in_data_type, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('data_type', value, config)")
|
541 |
in_allow_tensorfloat32.change(tensorfloat32_change, inputs=[in_allow_tensorfloat32, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('allow_tensorfloat32', value, config)")
|
542 |
in_variant.change(variant_change, inputs=[in_variant, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('variant', value, config)")
|
543 |
+
in_attention_slicing.change(variant_change, inputs=[in_attention_slicing, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('attention_slicing', value, config)")
|
544 |
in_model_refiner.change(model_refiner_change, inputs=[in_model_refiner, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('model_refiner', value, config)")
|
545 |
in_cpu_offload.change(cpu_offload_change, inputs=[in_cpu_offload, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('cpu_offload', value, config)")
|
546 |
in_safety_checker.change(safety_checker_change, inputs=[in_safety_checker, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('safety_checker', value, config)")
|
|
|
573 |
in_data_type,
|
574 |
in_model_refiner,
|
575 |
in_variant,
|
576 |
+
in_attention_slicing,
|
577 |
in_safety_checker,
|
578 |
in_requires_safety_checker,
|
579 |
in_auto_encoders,
|
config.py
CHANGED
@@ -40,6 +40,7 @@ def get_initial_config():
|
|
40 |
"cpu_offload": "False",
|
41 |
"scheduler": None,
|
42 |
"variant": None,
|
|
|
43 |
"allow_tensorfloat32": allow_tensorfloat32,
|
44 |
"use_safetensors": "False",
|
45 |
"data_type": data_type,
|
@@ -102,6 +103,7 @@ def get_config_from_url(initial_config, request: Request):
|
|
102 |
return_config['data_type'],
|
103 |
return_config['refiner'],
|
104 |
return_config['variant'],
|
|
|
105 |
return_config['safety_checker'],
|
106 |
return_config['requires_safety_checker'],
|
107 |
return_config['auto_encoder'],
|
@@ -162,7 +164,7 @@ def assemble_code(str_config):
|
|
162 |
code.append(f'variant = {config["variant"]}')
|
163 |
else:
|
164 |
code.append(f'variant = "{config["variant"]}"')
|
165 |
-
|
166 |
code.append(f'''use_safetensors = {config["use_safetensors"]}''')
|
167 |
|
168 |
# INIT PIPELINE
|
@@ -172,6 +174,8 @@ def assemble_code(str_config):
|
|
172 |
torch_dtype=data_type,
|
173 |
variant=variant).to(device)''')
|
174 |
|
|
|
|
|
175 |
if str(config["cpu_offload"]).lower() != 'false': code.append("pipeline.enable_model_cpu_offload()")
|
176 |
|
177 |
# AUTO ENCODER
|
|
|
40 |
"cpu_offload": "False",
|
41 |
"scheduler": None,
|
42 |
"variant": None,
|
43 |
+
"attention_slicing": "True",
|
44 |
"allow_tensorfloat32": allow_tensorfloat32,
|
45 |
"use_safetensors": "False",
|
46 |
"data_type": data_type,
|
|
|
103 |
return_config['data_type'],
|
104 |
return_config['refiner'],
|
105 |
return_config['variant'],
|
106 |
+
return_config['attention_slicing'],
|
107 |
return_config['safety_checker'],
|
108 |
return_config['requires_safety_checker'],
|
109 |
return_config['auto_encoder'],
|
|
|
164 |
code.append(f'variant = {config["variant"]}')
|
165 |
else:
|
166 |
code.append(f'variant = "{config["variant"]}"')
|
167 |
+
|
168 |
code.append(f'''use_safetensors = {config["use_safetensors"]}''')
|
169 |
|
170 |
# INIT PIPELINE
|
|
|
174 |
torch_dtype=data_type,
|
175 |
variant=variant).to(device)''')
|
176 |
|
177 |
+
if str(config["attention_slicing"]).lower() != 'false': code.append("pipeline.enable_attention_slicing()")
|
178 |
+
|
179 |
if str(config["cpu_offload"]).lower() != 'false': code.append("pipeline.enable_model_cpu_offload()")
|
180 |
|
181 |
# AUTO ENCODER
|