nickyreinert-vml commited on
Commit
f7b4ac7
·
1 Parent(s): 3c2eedc

adding "attention_slicing" method

Browse files
Files changed (3) hide show
  1. README.md +2 -0
  2. app.py +7 -0
  3. config.py +5 -1
README.md CHANGED
@@ -61,6 +61,8 @@ This allows you to select any model hosted on Huggingface. Some models are fine-
61
 
62
  The pipeline supports a way to prevent nswf-content to be created. I figured this does not always work properly, so those to options allow you to disable this feature.
63
 
 
 
64
  ## Scheduler/Solver
65
 
66
  This is the part of the process, that manipulates the output from the model every loop/epoch.
 
61
 
62
  The pipeline supports a way to prevent nswf-content to be created. I figured this does not always work properly, so those to options allow you to disable this feature.
63
 
64
+ **Attention slicing** divides attention operation into multiple steps, instead of one huge step. On machines with memory below 64 GByte or for images bigger than 512x512 pixels, this may increase performance drastically. On Apple's Silicon (M1, M1), it's recommend to keep this setting enabled. See https://huggingface.co/docs/diffusers/optimization/mps
65
+
66
  ## Scheduler/Solver
67
 
68
  This is the part of the process, that manipulates the output from the model every loop/epoch.
app.py CHANGED
@@ -274,6 +274,7 @@ def run_inference(config, config_history, pipeline, progress=gr.Progress(track_t
274
 
275
  if str(config["model"]) != 'None' and str(config["model"]) != 'null' and str(config["model"]) != '' and str(config["scheduler"]) != 'None':
276
 
 
277
  progress(1, desc="Initializing pipeline...")
278
 
279
  torch.cuda.empty_cache()
@@ -292,6 +293,9 @@ def run_inference(config, config_history, pipeline, progress=gr.Progress(track_t
292
  if str(config["cpu_offload"]).lower() != 'false':
293
  pipeline.enable_model_cpu_offload()
294
 
 
 
 
295
  # AUTO ENCODER
296
  if str(config["auto_encoder"]).lower() != 'none' and str(config["auto_encoder"]).lower() != 'null':
297
  pipeline.vae = AutoencoderKL.from_pretrained(config["auto_encoder"], torch_dtype=get_data_type(config["data_type"])).to(config["device"])
@@ -439,6 +443,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
439
  in_data_type = gr.Radio(label="Data Type:", value=config.value["data_type"], choices=["bfloat16", "float16", "float32"], info="`bfloat16` is not supported on MPS devices right now; `float16` may also not be supported on all devices, Half-precision weights, will save GPU memory, see https://huggingface.co/docs/diffusers/main/en/optimization/fp16")
440
  in_allow_tensorfloat32 = gr.Radio(label="Allow TensorFloat32:", value=config.value["allow_tensorfloat32"], choices=["True", "False"], info="is not supported on MPS devices right now; use TensorFloat-32 is faster, but results in slightly less accurate computations, see https://huggingface.co/docs/diffusers/main/en/optimization/fp16 ")
441
  in_variant = gr.Radio(label="Variant:", value=config.value["variant"], choices=["fp16", None], info="Use half-precision weights will save GPU memory, not all models support that, see https://huggingface.co/docs/diffusers/main/en/optimization/fp16 ")
 
442
 
443
  gr.Markdown("### Model specific settings")
444
  with gr.Row():
@@ -535,6 +540,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
535
  in_data_type.change(data_type_change, inputs=[in_data_type, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('data_type', value, config)")
536
  in_allow_tensorfloat32.change(tensorfloat32_change, inputs=[in_allow_tensorfloat32, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('allow_tensorfloat32', value, config)")
537
  in_variant.change(variant_change, inputs=[in_variant, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('variant', value, config)")
 
538
  in_model_refiner.change(model_refiner_change, inputs=[in_model_refiner, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('model_refiner', value, config)")
539
  in_cpu_offload.change(cpu_offload_change, inputs=[in_cpu_offload, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('cpu_offload', value, config)")
540
  in_safety_checker.change(safety_checker_change, inputs=[in_safety_checker, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('safety_checker', value, config)")
@@ -567,6 +573,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
567
  in_data_type,
568
  in_model_refiner,
569
  in_variant,
 
570
  in_safety_checker,
571
  in_requires_safety_checker,
572
  in_auto_encoders,
 
274
 
275
  if str(config["model"]) != 'None' and str(config["model"]) != 'null' and str(config["model"]) != '' and str(config["scheduler"]) != 'None':
276
 
277
+ print(config["model"])
278
  progress(1, desc="Initializing pipeline...")
279
 
280
  torch.cuda.empty_cache()
 
293
  if str(config["cpu_offload"]).lower() != 'false':
294
  pipeline.enable_model_cpu_offload()
295
 
296
+ # ATTENTION SLICING
297
+ if str(config["attention_slicing"]).lower() == 'true': pipeline.enable_attention_slicing()
298
+
299
  # AUTO ENCODER
300
  if str(config["auto_encoder"]).lower() != 'none' and str(config["auto_encoder"]).lower() != 'null':
301
  pipeline.vae = AutoencoderKL.from_pretrained(config["auto_encoder"], torch_dtype=get_data_type(config["data_type"])).to(config["device"])
 
443
  in_data_type = gr.Radio(label="Data Type:", value=config.value["data_type"], choices=["bfloat16", "float16", "float32"], info="`bfloat16` is not supported on MPS devices right now; `float16` may also not be supported on all devices, Half-precision weights, will save GPU memory, see https://huggingface.co/docs/diffusers/main/en/optimization/fp16")
444
  in_allow_tensorfloat32 = gr.Radio(label="Allow TensorFloat32:", value=config.value["allow_tensorfloat32"], choices=["True", "False"], info="is not supported on MPS devices right now; use TensorFloat-32 is faster, but results in slightly less accurate computations, see https://huggingface.co/docs/diffusers/main/en/optimization/fp16 ")
445
  in_variant = gr.Radio(label="Variant:", value=config.value["variant"], choices=["fp16", None], info="Use half-precision weights will save GPU memory, not all models support that, see https://huggingface.co/docs/diffusers/main/en/optimization/fp16 ")
446
+ in_attention_slicing = gr.Radio(label="Attention slicing:", value=config.value["attention_slicing"], choices=["True", "False"], info="Attention operation will be cutted into multiple steps, see https://huggingface.co/docs/diffusers/optimization/mps")
447
 
448
  gr.Markdown("### Model specific settings")
449
  with gr.Row():
 
540
  in_data_type.change(data_type_change, inputs=[in_data_type, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('data_type', value, config)")
541
  in_allow_tensorfloat32.change(tensorfloat32_change, inputs=[in_allow_tensorfloat32, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('allow_tensorfloat32', value, config)")
542
  in_variant.change(variant_change, inputs=[in_variant, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('variant', value, config)")
543
+ in_attention_slicing.change(variant_change, inputs=[in_attention_slicing, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('attention_slicing', value, config)")
544
  in_model_refiner.change(model_refiner_change, inputs=[in_model_refiner, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('model_refiner', value, config)")
545
  in_cpu_offload.change(cpu_offload_change, inputs=[in_cpu_offload, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('cpu_offload', value, config)")
546
  in_safety_checker.change(safety_checker_change, inputs=[in_safety_checker, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('safety_checker', value, config)")
 
573
  in_data_type,
574
  in_model_refiner,
575
  in_variant,
576
+ in_attention_slicing,
577
  in_safety_checker,
578
  in_requires_safety_checker,
579
  in_auto_encoders,
config.py CHANGED
@@ -40,6 +40,7 @@ def get_initial_config():
40
  "cpu_offload": "False",
41
  "scheduler": None,
42
  "variant": None,
 
43
  "allow_tensorfloat32": allow_tensorfloat32,
44
  "use_safetensors": "False",
45
  "data_type": data_type,
@@ -102,6 +103,7 @@ def get_config_from_url(initial_config, request: Request):
102
  return_config['data_type'],
103
  return_config['refiner'],
104
  return_config['variant'],
 
105
  return_config['safety_checker'],
106
  return_config['requires_safety_checker'],
107
  return_config['auto_encoder'],
@@ -162,7 +164,7 @@ def assemble_code(str_config):
162
  code.append(f'variant = {config["variant"]}')
163
  else:
164
  code.append(f'variant = "{config["variant"]}"')
165
-
166
  code.append(f'''use_safetensors = {config["use_safetensors"]}''')
167
 
168
  # INIT PIPELINE
@@ -172,6 +174,8 @@ def assemble_code(str_config):
172
  torch_dtype=data_type,
173
  variant=variant).to(device)''')
174
 
 
 
175
  if str(config["cpu_offload"]).lower() != 'false': code.append("pipeline.enable_model_cpu_offload()")
176
 
177
  # AUTO ENCODER
 
40
  "cpu_offload": "False",
41
  "scheduler": None,
42
  "variant": None,
43
+ "attention_slicing": "True",
44
  "allow_tensorfloat32": allow_tensorfloat32,
45
  "use_safetensors": "False",
46
  "data_type": data_type,
 
103
  return_config['data_type'],
104
  return_config['refiner'],
105
  return_config['variant'],
106
+ return_config['attention_slicing'],
107
  return_config['safety_checker'],
108
  return_config['requires_safety_checker'],
109
  return_config['auto_encoder'],
 
164
  code.append(f'variant = {config["variant"]}')
165
  else:
166
  code.append(f'variant = "{config["variant"]}"')
167
+
168
  code.append(f'''use_safetensors = {config["use_safetensors"]}''')
169
 
170
  # INIT PIPELINE
 
174
  torch_dtype=data_type,
175
  variant=variant).to(device)''')
176
 
177
+ if str(config["attention_slicing"]).lower() != 'false': code.append("pipeline.enable_attention_slicing()")
178
+
179
  if str(config["cpu_offload"]).lower() != 'false': code.append("pipeline.enable_model_cpu_offload()")
180
 
181
  # AUTO ENCODER