adding custom auto encoder
Browse files- app.py +25 -4
- appConfig.json +6 -0
- config.py +5 -0
app.py
CHANGED
@@ -135,6 +135,19 @@ def requires_safety_checker_change(requires_safety_checker, config):
|
|
135 |
|
136 |
return config, str(config), assemble_code(config)
|
137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
def schedulers_change(scheduler, config):
|
139 |
|
140 |
if str(scheduler) != 'None' and type(scheduler) != list:
|
@@ -222,6 +235,7 @@ appConfig = load_app_config()
|
|
222 |
models = appConfig.get("models", {})
|
223 |
schedulers = appConfig.get("schedulers", {})
|
224 |
devices = appConfig.get("devices", [])
|
|
|
225 |
|
226 |
# interface
|
227 |
with gr.Blocks(analytics_enabled=False) as demo:
|
@@ -246,7 +260,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
246 |
out_model_description = gr.Textbox(value="", label="Description")
|
247 |
with gr.Row():
|
248 |
with gr.Column(scale=1):
|
249 |
-
in_trigger_token = gr.Textbox(value=config.value["trigger_token"], label="Trigger Token")
|
250 |
in_use_safetensors = gr.Radio(label="Use safe tensors:", choices=["True", "False"], interactive=False)
|
251 |
in_model_refiner = gr.Dropdown(value=config.value["refiner"], choices=["none"], label="Refiner", allow_custom_value=True, multiselect=False)
|
252 |
with gr.Column(scale=1):
|
@@ -271,18 +285,23 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
271 |
in_inference_steps = gr.Number(label="Inference steps", value=config.value["inference_steps"], info="Each step improves the final result but also results in higher computation")
|
272 |
in_manual_seed = gr.Number(label="Manual seed", value=config.value["manual_seed"], info="Set this to -1 or leave it empty to randomly generate an image. A fixed value will result in a similar image for every run")
|
273 |
in_guidance_scale = gr.Slider(minimum=0, maximum=100, step=0.1, label="Guidance Scale", value=config.value["guidance_scale"], info="A low guidance scale leads to a faster inference time, with the drawback that negative prompts don’t have any effect on the denoising process.")
|
|
|
|
|
274 |
with gr.Row():
|
275 |
gr.Markdown("**VAE** stands for Variational Autoencoders. An 'autoencoder' is an artificial neural network that is able to encode input data and decode to output data to bascially recreate the input. The VAE whereas adds a couple of additional layers of complexity to create new and unique output.")
|
276 |
with gr.Row():
|
277 |
-
|
278 |
-
|
|
|
|
|
|
|
|
|
279 |
|
280 |
gr.Markdown("### Output")
|
281 |
with gr.Row():
|
282 |
btn_start_pipeline = gr.Button(value="Run", variant="primary")
|
283 |
btn_stop_pipeline = gr.Button(value="Stop", variant="stop")
|
284 |
with gr.Row():
|
285 |
-
# out_result = gr.Textbox(label="Status", value="")
|
286 |
out_image = gr.Image()
|
287 |
out_code = gr.Code(assemble_code(config.value), label="Code")
|
288 |
with gr.Row():
|
@@ -303,6 +322,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
303 |
in_inference_steps.change(inference_steps_change, inputs=[in_inference_steps, config], outputs=[config, out_config, out_code])
|
304 |
in_manual_seed.change(manual_seed_change, inputs=[in_manual_seed, config], outputs=[config, out_config, out_code])
|
305 |
in_guidance_scale.change(guidance_scale_change, inputs=[in_guidance_scale, config], outputs=[config, out_config, out_code])
|
|
|
306 |
in_enable_vae_slicing.change(enable_vae_slicing_change, inputs=[in_enable_vae_slicing, config], outputs=[config, out_config, out_code])
|
307 |
in_enable_vae_tiling.change(enable_vae_tiling_change, inputs=[in_enable_vae_tiling, config], outputs=[config, out_config, out_code])
|
308 |
in_prompt.change(prompt_change, inputs=[in_prompt, config], outputs=[config, out_config, out_code])
|
@@ -325,6 +345,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
325 |
in_variant,
|
326 |
in_safety_checker,
|
327 |
in_requires_safety_checker,
|
|
|
328 |
in_enable_vae_slicing,
|
329 |
in_enable_vae_tiling,
|
330 |
in_schedulers,
|
|
|
135 |
|
136 |
return config, str(config), assemble_code(config)
|
137 |
|
138 |
+
def auto_encoders_change(auto_encoder, config):
|
139 |
+
|
140 |
+
if str(auto_encoder) != 'None' and type(auto_encoder) != list:
|
141 |
+
|
142 |
+
auto_encoder_description = auto_encoders[auto_encoder]
|
143 |
+
|
144 |
+
else:
|
145 |
+
auto_encoder_description = ''
|
146 |
+
|
147 |
+
config = set_config(config, 'auto_encoder', auto_encoder)
|
148 |
+
|
149 |
+
return auto_encoder_description, config, str(config), assemble_code(config)
|
150 |
+
|
151 |
def schedulers_change(scheduler, config):
|
152 |
|
153 |
if str(scheduler) != 'None' and type(scheduler) != list:
|
|
|
235 |
models = appConfig.get("models", {})
|
236 |
schedulers = appConfig.get("schedulers", {})
|
237 |
devices = appConfig.get("devices", [])
|
238 |
+
auto_encoders = appConfig.get("auto_encoders", [])
|
239 |
|
240 |
# interface
|
241 |
with gr.Blocks(analytics_enabled=False) as demo:
|
|
|
260 |
out_model_description = gr.Textbox(value="", label="Description")
|
261 |
with gr.Row():
|
262 |
with gr.Column(scale=1):
|
263 |
+
in_trigger_token = gr.Textbox(value=config.value["trigger_token"], label="Trigger Token", info="will be added to your prompt to `activate` a fine tuned model")
|
264 |
in_use_safetensors = gr.Radio(label="Use safe tensors:", choices=["True", "False"], interactive=False)
|
265 |
in_model_refiner = gr.Dropdown(value=config.value["refiner"], choices=["none"], label="Refiner", allow_custom_value=True, multiselect=False)
|
266 |
with gr.Column(scale=1):
|
|
|
285 |
in_inference_steps = gr.Number(label="Inference steps", value=config.value["inference_steps"], info="Each step improves the final result but also results in higher computation")
|
286 |
in_manual_seed = gr.Number(label="Manual seed", value=config.value["manual_seed"], info="Set this to -1 or leave it empty to randomly generate an image. A fixed value will result in a similar image for every run")
|
287 |
in_guidance_scale = gr.Slider(minimum=0, maximum=100, step=0.1, label="Guidance Scale", value=config.value["guidance_scale"], info="A low guidance scale leads to a faster inference time, with the drawback that negative prompts don’t have any effect on the denoising process.")
|
288 |
+
with gr.Row():
|
289 |
+
gr.Markdown("### Auto Encoder")
|
290 |
with gr.Row():
|
291 |
gr.Markdown("**VAE** stands for Variational Autoencoders. An 'autoencoder' is an artificial neural network that is able to encode input data and decode to output data to bascially recreate the input. The VAE whereas adds a couple of additional layers of complexity to create new and unique output.")
|
292 |
with gr.Row():
|
293 |
+
with gr.Column():
|
294 |
+
in_auto_encoders = gr.Dropdown(value="None", choices=list(auto_encoders.keys()), label="Auto encoder", info="leave empty to not add an auto encoder")
|
295 |
+
out_auto_encoder_description = gr.Textbox(value="", label="Description")
|
296 |
+
with gr.Column():
|
297 |
+
in_enable_vae_slicing = gr.Radio(label="Enable VAE slicing:", value=config.value["enable_vae_slicing"], choices=["True", "False"], info="decoding the batches of latents one image at a time, which may reduce memory usage, see https://huggingface.co/docs/diffusers/main/en/optimization/memory")
|
298 |
+
in_enable_vae_tiling= gr.Radio(label="Enable VAE tiling:", value=config.value["enable_vae_tiling"], choices=["True", "False"], info="splitting the image into overlapping tiles, decoding the tiles, and then blending the outputs together to compose the final image, see https://huggingface.co/docs/diffusers/main/en/optimization/memory")
|
299 |
|
300 |
gr.Markdown("### Output")
|
301 |
with gr.Row():
|
302 |
btn_start_pipeline = gr.Button(value="Run", variant="primary")
|
303 |
btn_stop_pipeline = gr.Button(value="Stop", variant="stop")
|
304 |
with gr.Row():
|
|
|
305 |
out_image = gr.Image()
|
306 |
out_code = gr.Code(assemble_code(config.value), label="Code")
|
307 |
with gr.Row():
|
|
|
322 |
in_inference_steps.change(inference_steps_change, inputs=[in_inference_steps, config], outputs=[config, out_config, out_code])
|
323 |
in_manual_seed.change(manual_seed_change, inputs=[in_manual_seed, config], outputs=[config, out_config, out_code])
|
324 |
in_guidance_scale.change(guidance_scale_change, inputs=[in_guidance_scale, config], outputs=[config, out_config, out_code])
|
325 |
+
in_auto_encoders.change(auto_encoders_change, inputs=[in_auto_encoders, config], outputs=[out_auto_encoder_description, config, out_config, out_code])
|
326 |
in_enable_vae_slicing.change(enable_vae_slicing_change, inputs=[in_enable_vae_slicing, config], outputs=[config, out_config, out_code])
|
327 |
in_enable_vae_tiling.change(enable_vae_tiling_change, inputs=[in_enable_vae_tiling, config], outputs=[config, out_config, out_code])
|
328 |
in_prompt.change(prompt_change, inputs=[in_prompt, config], outputs=[config, out_config, out_code])
|
|
|
345 |
in_variant,
|
346 |
in_safety_checker,
|
347 |
in_requires_safety_checker,
|
348 |
+
in_auto_encoders,
|
349 |
in_enable_vae_slicing,
|
350 |
in_enable_vae_tiling,
|
351 |
in_schedulers,
|
appConfig.json
CHANGED
@@ -72,6 +72,12 @@
|
|
72 |
"backup_devices": [
|
73 |
"cpu", "cuda", "ipu", "xpu", "mkldnn", "opengl", "opencl", "ideep", "hip", "ve", "fpga", "ort", "xla", "lazy", "vulkan", "mps", "meta", "hpu", "mtia", "privateuseone", "gpu"
|
74 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
"schedulers": {
|
76 |
"DDPMScheduler": "Denoising Diffusion Probabilistic Model",
|
77 |
"DDIMScheduler": "Denoising Diffusion Incremental Sampling, efficient image generation, might require more tunin",
|
|
|
72 |
"backup_devices": [
|
73 |
"cpu", "cuda", "ipu", "xpu", "mkldnn", "opengl", "opencl", "ideep", "hip", "ve", "fpga", "ort", "xla", "lazy", "vulkan", "mps", "meta", "hpu", "mtia", "privateuseone", "gpu"
|
74 |
],
|
75 |
+
"auto_encoders": {
|
76 |
+
"None": "",
|
77 |
+
"stabilityai/sdxl-vae": "finetuned auto encoder for stable diffusion models, see https://huggingface.co/stabilityai/sdxl-vae",
|
78 |
+
"madebyollin/sdxl-vae-fp16-fix": "stable diffusion models encoder with fp16 precision, see https://huggingface.co/madebyollin/sdxl-vae-fp16-fix",
|
79 |
+
"stabilityai/sd-vae-ft-mse": "works best with CompVis/stable-diffusion-v1-4, see https://huggingface.co/stabilityai/sd-vae-ft-mse"
|
80 |
+
},
|
81 |
"schedulers": {
|
82 |
"DDPMScheduler": "Denoising Diffusion Probabilistic Model",
|
83 |
"DDIMScheduler": "Denoising Diffusion Incremental Sampling, efficient image generation, might require more tunin",
|
config.py
CHANGED
@@ -46,6 +46,7 @@ def get_initial_config():
|
|
46 |
"refiner": "none",
|
47 |
"safety_checker": "False",
|
48 |
"requires_safety_checker": "False",
|
|
|
49 |
"enable_vae_slicing": "True",
|
50 |
"enable_vae_tiling": "True",
|
51 |
"manual_seed": 42,
|
@@ -88,6 +89,7 @@ def get_config_from_url(initial_config, request: Request):
|
|
88 |
return_config['variant'],
|
89 |
return_config['safety_checker'],
|
90 |
return_config['requires_safety_checker'],
|
|
|
91 |
return_config['enable_vae_slicing'],
|
92 |
return_config['enable_vae_tiling'],
|
93 |
return_config['scheduler'],
|
@@ -151,6 +153,9 @@ def assemble_code(str_config):
|
|
151 |
torch_dtype=data_type,
|
152 |
variant=variant).to(device)''')
|
153 |
|
|
|
|
|
|
|
154 |
if str(config["cpu_offload"]).lower() != 'false': code.append("pipeline.enable_model_cpu_offload()")
|
155 |
if str(config["enable_vae_slicing"]).lower() != 'false': code.append("pipeline.enable_vae_slicing()")
|
156 |
if str(config["enable_vae_tiling"]).lower() != 'false': code.append("pipeline.enable_vae_tiling()")
|
|
|
46 |
"refiner": "none",
|
47 |
"safety_checker": "False",
|
48 |
"requires_safety_checker": "False",
|
49 |
+
"auto_encoder": None,
|
50 |
"enable_vae_slicing": "True",
|
51 |
"enable_vae_tiling": "True",
|
52 |
"manual_seed": 42,
|
|
|
89 |
return_config['variant'],
|
90 |
return_config['safety_checker'],
|
91 |
return_config['requires_safety_checker'],
|
92 |
+
return_config['auto_encoder'],
|
93 |
return_config['enable_vae_slicing'],
|
94 |
return_config['enable_vae_tiling'],
|
95 |
return_config['scheduler'],
|
|
|
153 |
torch_dtype=data_type,
|
154 |
variant=variant).to(device)''')
|
155 |
|
156 |
+
if str(config["auto_encoder"]).lower() != 'none':
|
157 |
+
code.append(f'pipeline.vae = AutoencoderKL.from_pretrained("{config["auto_encoder"]}", torch_dtype=data_type).to(device)')
|
158 |
+
|
159 |
if str(config["cpu_offload"]).lower() != 'false': code.append("pipeline.enable_model_cpu_offload()")
|
160 |
if str(config["enable_vae_slicing"]).lower() != 'false': code.append("pipeline.enable_vae_slicing()")
|
161 |
if str(config["enable_vae_tiling"]).lower() != 'false': code.append("pipeline.enable_vae_tiling()")
|