Spaces:

n42
/

pictero

Running

App Files Files Community

nickyreinert-vml commited on Jun 30, 2024

Commit

f7b4ac7

1 Parent(s): 3c2eedc

adding "attention_slicing" method

Browse files

Files changed (3) hide show

README.md +2 -0
app.py +7 -0
config.py +5 -1

README.md CHANGED Viewed

@@ -61,6 +61,8 @@ This allows you to select any model hosted on Huggingface. Some models are fine-
 The pipeline supports a way to prevent nswf-content to be created. I figured this does not always work properly, so those to options allow you to disable this feature.
 ## Scheduler/Solver
 This is the part of the process, that manipulates the output from the model every loop/epoch.

 The pipeline supports a way to prevent nswf-content to be created. I figured this does not always work properly, so those to options allow you to disable this feature.
+**Attention slicing** divides attention operation into multiple steps, instead of one huge step. On machines with memory below 64 GByte or for images bigger than 512x512 pixels, this may increase performance drastically. On Apple's Silicon (M1, M1), it's recommend to keep this setting enabled. See https://huggingface.co/docs/diffusers/optimization/mps
 ## Scheduler/Solver
 This is the part of the process, that manipulates the output from the model every loop/epoch.

app.py CHANGED Viewed

@@ -274,6 +274,7 @@ def run_inference(config, config_history, pipeline, progress=gr.Progress(track_t
     if str(config["model"]) != 'None' and str(config["model"]) != 'null' and str(config["model"]) != '' and str(config["scheduler"]) != 'None':
         progress(1, desc="Initializing pipeline...")
         torch.cuda.empty_cache()
@@ -292,6 +293,9 @@ def run_inference(config, config_history, pipeline, progress=gr.Progress(track_t
         if str(config["cpu_offload"]).lower() != 'false':
             pipeline.enable_model_cpu_offload()
         # AUTO ENCODER
         if str(config["auto_encoder"]).lower() != 'none' and str(config["auto_encoder"]).lower() != 'null':
             pipeline.vae = AutoencoderKL.from_pretrained(config["auto_encoder"], torch_dtype=get_data_type(config["data_type"])).to(config["device"])
@@ -439,6 +443,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
         in_data_type = gr.Radio(label="Data Type:", value=config.value["data_type"], choices=["bfloat16", "float16", "float32"], info="`bfloat16` is not supported on MPS devices right now; `float16` may also not be supported on all devices, Half-precision weights, will save GPU memory, see https://huggingface.co/docs/diffusers/main/en/optimization/fp16")
         in_allow_tensorfloat32 = gr.Radio(label="Allow TensorFloat32:", value=config.value["allow_tensorfloat32"], choices=["True", "False"], info="is not supported on MPS devices right now; use TensorFloat-32 is faster, but results in slightly less accurate computations, see https://huggingface.co/docs/diffusers/main/en/optimization/fp16 ")
         in_variant = gr.Radio(label="Variant:", value=config.value["variant"], choices=["fp16", None], info="Use half-precision weights will save GPU memory, not all models support that, see https://huggingface.co/docs/diffusers/main/en/optimization/fp16 ")
     gr.Markdown("### Model specific settings")
     with gr.Row():
@@ -535,6 +540,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
     in_data_type.change(data_type_change, inputs=[in_data_type, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('data_type', value, config)")
     in_allow_tensorfloat32.change(tensorfloat32_change, inputs=[in_allow_tensorfloat32, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('allow_tensorfloat32', value, config)")
     in_variant.change(variant_change, inputs=[in_variant, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('variant', value, config)")
     in_model_refiner.change(model_refiner_change, inputs=[in_model_refiner, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('model_refiner', value, config)")
     in_cpu_offload.change(cpu_offload_change, inputs=[in_cpu_offload, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('cpu_offload', value, config)")
     in_safety_checker.change(safety_checker_change, inputs=[in_safety_checker, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('safety_checker', value, config)")
@@ -567,6 +573,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
             in_data_type,
             in_model_refiner,
             in_variant,
             in_safety_checker,
             in_requires_safety_checker,
             in_auto_encoders,

     if str(config["model"]) != 'None' and str(config["model"]) != 'null' and str(config["model"]) != '' and str(config["scheduler"]) != 'None':
+        print(config["model"])
         progress(1, desc="Initializing pipeline...")
         torch.cuda.empty_cache()
         if str(config["cpu_offload"]).lower() != 'false':
             pipeline.enable_model_cpu_offload()
+        # ATTENTION SLICING
+        if str(config["attention_slicing"]).lower() == 'true': pipeline.enable_attention_slicing()
         # AUTO ENCODER
         if str(config["auto_encoder"]).lower() != 'none' and str(config["auto_encoder"]).lower() != 'null':
             pipeline.vae = AutoencoderKL.from_pretrained(config["auto_encoder"], torch_dtype=get_data_type(config["data_type"])).to(config["device"])
         in_data_type = gr.Radio(label="Data Type:", value=config.value["data_type"], choices=["bfloat16", "float16", "float32"], info="`bfloat16` is not supported on MPS devices right now; `float16` may also not be supported on all devices, Half-precision weights, will save GPU memory, see https://huggingface.co/docs/diffusers/main/en/optimization/fp16")
         in_allow_tensorfloat32 = gr.Radio(label="Allow TensorFloat32:", value=config.value["allow_tensorfloat32"], choices=["True", "False"], info="is not supported on MPS devices right now; use TensorFloat-32 is faster, but results in slightly less accurate computations, see https://huggingface.co/docs/diffusers/main/en/optimization/fp16 ")
         in_variant = gr.Radio(label="Variant:", value=config.value["variant"], choices=["fp16", None], info="Use half-precision weights will save GPU memory, not all models support that, see https://huggingface.co/docs/diffusers/main/en/optimization/fp16 ")
+        in_attention_slicing = gr.Radio(label="Attention slicing:", value=config.value["attention_slicing"], choices=["True", "False"], info="Attention operation will be cutted into multiple steps, see https://huggingface.co/docs/diffusers/optimization/mps")
     gr.Markdown("### Model specific settings")
     with gr.Row():
     in_data_type.change(data_type_change, inputs=[in_data_type, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('data_type', value, config)")
     in_allow_tensorfloat32.change(tensorfloat32_change, inputs=[in_allow_tensorfloat32, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('allow_tensorfloat32', value, config)")
     in_variant.change(variant_change, inputs=[in_variant, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('variant', value, config)")
+    in_attention_slicing.change(variant_change, inputs=[in_attention_slicing, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('attention_slicing', value, config)")
     in_model_refiner.change(model_refiner_change, inputs=[in_model_refiner, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('model_refiner', value, config)")
     in_cpu_offload.change(cpu_offload_change, inputs=[in_cpu_offload, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('cpu_offload', value, config)")
     in_safety_checker.change(safety_checker_change, inputs=[in_safety_checker, config], outputs=[config, out_config, out_code], js="(value, config) => set_cookie('safety_checker', value, config)")
             in_data_type,
             in_model_refiner,
             in_variant,
+            in_attention_slicing,
             in_safety_checker,
             in_requires_safety_checker,
             in_auto_encoders,

config.py CHANGED Viewed

@@ -40,6 +40,7 @@ def get_initial_config():
         "cpu_offload": "False",
         "scheduler": None,
         "variant": None,
         "allow_tensorfloat32": allow_tensorfloat32,
         "use_safetensors": "False",
         "data_type": data_type,
@@ -102,6 +103,7 @@ def get_config_from_url(initial_config, request: Request):
             return_config['data_type'],
             return_config['refiner'],
             return_config['variant'],
             return_config['safety_checker'],
             return_config['requires_safety_checker'],
             return_config['auto_encoder'],
@@ -162,7 +164,7 @@ def assemble_code(str_config):
         code.append(f'variant = {config["variant"]}')
     else:
         code.append(f'variant = "{config["variant"]}"')
     code.append(f'''use_safetensors = {config["use_safetensors"]}''')
     # INIT PIPELINE
@@ -172,6 +174,8 @@ def assemble_code(str_config):
             torch_dtype=data_type,
             variant=variant).to(device)''')
     if str(config["cpu_offload"]).lower() != 'false': code.append("pipeline.enable_model_cpu_offload()")
     # AUTO ENCODER

         "cpu_offload": "False",
         "scheduler": None,
         "variant": None,
+        "attention_slicing": "True",
         "allow_tensorfloat32": allow_tensorfloat32,
         "use_safetensors": "False",
         "data_type": data_type,
             return_config['data_type'],
             return_config['refiner'],
             return_config['variant'],
+            return_config['attention_slicing'],
             return_config['safety_checker'],
             return_config['requires_safety_checker'],
             return_config['auto_encoder'],
         code.append(f'variant = {config["variant"]}')
     else:
         code.append(f'variant = "{config["variant"]}"')
     code.append(f'''use_safetensors = {config["use_safetensors"]}''')
     # INIT PIPELINE
             torch_dtype=data_type,
             variant=variant).to(device)''')
+    if str(config["attention_slicing"]).lower() != 'false': code.append("pipeline.enable_attention_slicing()")
     if str(config["cpu_offload"]).lower() != 'false': code.append("pipeline.enable_model_cpu_offload()")
     # AUTO ENCODER