Spaces:

Nick088
/

stable-diffusion-arena

Running on Zero

App Files Files Community

Nick088 commited on Jun 24, 2024

Commit

9c05349

verified ·

1 Parent(s): 60fba13

sdxl flash & stable cascade, improved advanced settings

Browse files

Files changed (1) hide show

app.py +516 -87

app.py CHANGED Viewed

@@ -1,12 +1,13 @@
 import torch
-from diffusers import StableDiffusion3Pipeline, StableDiffusionPipeline, StableDiffusionXLPipeline, DPMSolverSinglestepScheduler
 import gradio as gr
 import os
 import random
 import numpy as np
 import spaces
-HF_TOKEN = os.getenv("HF_TOKEN")
 if torch.cuda.is_available():
     device = "cuda"
@@ -19,41 +20,42 @@ else:
 MAX_SEED = np.iinfo(np.int32).max
 # Initialize the pipelines for each sd model
-sd3_medium_pipe = StableDiffusion3Pipeline.from_pretrained(
-    "stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16
-)
 sd3_medium_pipe.enable_model_cpu_offload()
-sd2_1_pipe = StableDiffusionPipeline.from_pretrained(
-    "stabilityai/stable-diffusion-2-1", torch_dtype=torch.float16
-)
 sd2_1_pipe.enable_model_cpu_offload()
-sdxl_pipe = StableDiffusionXLPipeline.from_pretrained(
-    "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
-)
 sdxl_pipe.enable_model_cpu_offload()
-sdxl_flash_pipe = StableDiffusionXLPipeline.from_pretrained(
-    "sd-community/sdxl-flash", torch_dtype=torch.float16
-)
 sdxl_flash_pipe.enable_model_cpu_offload()
 # Ensure sampler uses "trailing" timesteps for sdxl flash.
 sdxl_flash_pipe.scheduler = DPMSolverSinglestepScheduler.from_config(sdxl_flash_pipe.scheduler.config, timestep_spacing="trailing")
 # Helper function to generate images for a single model
 @spaces.GPU(duration=80)
 def generate_single_image(
     prompt,
     negative_prompt,
     num_inference_steps,
     height,
     width,
-    guidance_scale,
     seed,
     num_images_per_prompt,
     model_choice,
     generator,
 ):
     # Select the correct pipeline based on the model choice
     if model_choice == "sd3 medium":
@@ -64,19 +66,41 @@ def generate_single_image(
         pipe = sdxl_pipe
     elif model_choice == "sdxl flash":
         pipe = sdxl_flash_pipe
     else:
         raise ValueError(f"Invalid model choice: {model_choice}")
-    output = pipe(
-        prompt=prompt,
-        negative_prompt=negative_prompt,
-        num_inference_steps=num_inference_steps,
-        height=height,
-        width=width,
-        guidance_scale=guidance_scale,
-        generator=generator,
-        num_images_per_prompt=num_images_per_prompt,
-    ).images
     return output
@@ -85,14 +109,24 @@ def generate_single_image(
 def generate_arena_images(
     prompt,
     negative_prompt,
-    num_inference_steps,
     height,
     width,
-    guidance_scale,
     seed,
     num_images_per_prompt,
-    model_choice_1,
-    model_choice_2,
     progress=gr.Progress(track_tqdm=True),
 ):
     if seed == 0:
@@ -101,32 +135,40 @@ def generate_arena_images(
     generator = torch.Generator().manual_seed(seed)
     # Generate images for both models
-    images_1 = generate_single_image(
         prompt,
         negative_prompt,
-        num_inference_steps,
         height,
         width,
-        guidance_scale,
         seed,
         num_images_per_prompt,
-        model_choice_1,
         generator,
     )
-    images_2 = generate_single_image(
         prompt,
         negative_prompt,
-        num_inference_steps,
         height,
         width,
-        guidance_scale,
         seed,
         num_images_per_prompt,
-        model_choice_2,
         generator,
     )
-    return images_1, images_2
 # Define the image generation function for the Individual tab
 @spaces.GPU(duration=80)
@@ -134,12 +176,16 @@ def generate_individual_image(
     prompt,
     negative_prompt,
     num_inference_steps,
     height,
     width,
-    guidance_scale,
     seed,
     num_images_per_prompt,
     model_choice,
     progress=gr.Progress(track_tqdm=True),
 ):
     if seed == 0:
@@ -151,23 +197,100 @@ def generate_individual_image(
         prompt,
         negative_prompt,
         num_inference_steps,
         height,
         width,
-        guidance_scale,
         seed,
         num_images_per_prompt,
         model_choice,
         generator,
     )
     return output
 # Create the Gradio interface
-examples = [
-    ["A white car racing fast to the moon."],
-    ["A woman in a red dress singing on top of a building."],
-    ["An astronaut on mars in a futuristic cyborg suit."],
 ]
 css = """
@@ -199,44 +322,137 @@ with gr.Blocks(css=css) as demo:
                         info="Describe the image you want",
                         placeholder="A cat...",
                     )
-                    model_choice_1 = gr.Dropdown(
-                        label="Stable Diffusion Model 1",
-                        choices=["sd3 medium", "sd2.1", "sdxl", "sdxl flash"],
                         value="sd3 medium",
                     )
-                    model_choice_2 = gr.Dropdown(
-                        label="Stable Diffusion Model 2",
-                        choices=["sd3 medium", "sd2.1", "sdxl", "sdxl flash"],
                         value="sdxl",
                     )
                     run_button = gr.Button("Run")
-                result_1 = gr.Gallery(label="Generated Images (Model 1)", elem_id="gallery_1")
-                result_2 = gr.Gallery(label="Generated Images (Model 2)", elem_id="gallery_2")
             with gr.Accordion("Advanced options", open=False):
                 with gr.Row():
-                    negative_prompt = gr.Textbox(
-                        label="Negative Prompt",
-                        info="Describe what you don't want in the image",
-                        value="deformed, distorted, disfigured, poorly drawn, bad anatomy, incorrect anatomy, extra limb, missing limb, floating limbs, mutated hands and fingers, disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation",
-                        placeholder="Ugly, bad anatomy...",
-                    )
-                with gr.Row():
-                    num_inference_steps = gr.Slider(
-                        label="Number of Inference Steps",
-                        info="The number of denoising steps of the image. More denoising steps usually lead to a higher quality image at the cost of slower inference",
-                        minimum=1,
-                        maximum=50,
-                        value=25,
-                        step=1,
-                    )
-                    guidance_scale = gr.Slider(
-                        label="Guidance Scale",
-                        info="Controls how much the image generation process follows the text prompt. Higher values make the image stick more closely to the input text.",
-                        minimum=0.0,
-                        maximum=10.0,
-                        value=7.5,
-                        step=0.1,
-                    )
                 with gr.Row():
                     width = gr.Slider(
                         label="Width",
@@ -272,9 +488,114 @@ with gr.Blocks(css=css) as demo:
                         value=2,
                     )
             gr.Examples(
-                examples=examples,
-                inputs=[prompt],
                 outputs=[result_1, result_2],
                 fn=generate_arena_images,
             )
@@ -288,14 +609,24 @@ with gr.Blocks(css=css) as demo:
                 inputs=[
                     prompt,
                     negative_prompt,
-                    num_inference_steps,
-                    width,
                     height,
-                    guidance_scale,
                     seed,
                     num_images_per_prompt,
-                    model_choice_1,
-                    model_choice_2,
                 ],
                 outputs=[result_1, result_2],
             )
@@ -310,7 +641,7 @@ with gr.Blocks(css=css) as demo:
                     )
                     model_choice = gr.Dropdown(
                         label="Stable Diffusion Model",
-                        choices=["sd3 medium", "sd2.1", "sdxl", "sdxl flash"],
                         value="sd3 medium",
                     )
                     run_button = gr.Button("Run")
@@ -331,6 +662,7 @@ with gr.Blocks(css=css) as demo:
                         maximum=50,
                         value=25,
                         step=1,
                     )
                     guidance_scale = gr.Slider(
                         label="Guidance Scale",
@@ -339,6 +671,43 @@ with gr.Blocks(css=css) as demo:
                         maximum=10.0,
                         value=7.5,
                         step=0.1,
                     )
                 with gr.Row():
                     width = gr.Slider(
@@ -375,9 +744,65 @@ with gr.Blocks(css=css) as demo:
                         value=2,
                     )
             gr.Examples(
-                examples=examples,
-                inputs=[prompt],
                 outputs=[result],
                 fn=generate_individual_image,
             )
@@ -392,12 +817,16 @@ with gr.Blocks(css=css) as demo:
                     prompt,
                     negative_prompt,
                     num_inference_steps,
-                    width,
-                    height,
                     guidance_scale,
                     seed,
                     num_images_per_prompt,
                     model_choice,
                 ],
                 outputs=[result],
             )

 import torch
+from diffusers import StableDiffusion3Pipeline, StableDiffusionPipeline, StableDiffusionXLPipeline, DPMSolverSinglestepScheduler, StableCascadePriorPipeline, StableCascadeDecoderPipeline
 import gradio as gr
 import os
 import random
 import numpy as np
+from PIL import Image
 import spaces
+HF_TOKEN = os.getenv("HF_TOKEN") # login with hf token to access sd gated models
 if torch.cuda.is_available():
     device = "cuda"
 MAX_SEED = np.iinfo(np.int32).max
 # Initialize the pipelines for each sd model
+sd3_medium_pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16)
 sd3_medium_pipe.enable_model_cpu_offload()
+sd2_1_pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1", torch_dtype=torch.float16)
 sd2_1_pipe.enable_model_cpu_offload()
+sdxl_pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
 sdxl_pipe.enable_model_cpu_offload()
+sdxl_flash_pipe = StableDiffusionXLPipeline.from_pretrained("sd-community/sdxl-flash", torch_dtype=torch.float16)
 sdxl_flash_pipe.enable_model_cpu_offload()
 # Ensure sampler uses "trailing" timesteps for sdxl flash.
 sdxl_flash_pipe.scheduler = DPMSolverSinglestepScheduler.from_config(sdxl_flash_pipe.scheduler.config, timestep_spacing="trailing")
+stable_cascade_prior_pipe = StableCascadePriorPipeline.from_pretrained("stabilityai/stable-cascade-prior", variant="bf16", torch_dtype=torch.bfloat16)
+stable_cascade_decoder_pipe = StableCascadeDecoderPipeline.from_pretrained("stabilityai/stable-cascade", variant="bf16", torch_dtype=torch.float16)
+stable_cascade_prior_pipe.enable_model_cpu_offload()
+stable_cascade_decoder_pipe.enable_model_cpu_offload()
 # Helper function to generate images for a single model
 @spaces.GPU(duration=80)
 def generate_single_image(
     prompt,
     negative_prompt,
     num_inference_steps,
+    guidance_scale,
     height,
     width,
     seed,
     num_images_per_prompt,
     model_choice,
     generator,
+    prior_num_inference_steps=None,
+    prior_guidance_scale=None,
+    decoder_num_inference_steps=None,
+    decoder_guidance_scale=None,
 ):
     # Select the correct pipeline based on the model choice
     if model_choice == "sd3 medium":
         pipe = sdxl_pipe
     elif model_choice == "sdxl flash":
         pipe = sdxl_flash_pipe
+    elif model_choice == "stable cascade":
+        pipe = stable_cascade_prior_pipe
     else:
         raise ValueError(f"Invalid model choice: {model_choice}")
+    if model_choice == "stable cascade":
+        prior_output = pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            num_inference_steps=prior_num_inference_steps,
+            guidance_scale=prior_guidance_scale,
+            height=height,
+            width=width,
+            generator=generator,
+            num_images_per_prompt=num_images_per_prompt,
+        )
+        output = stable_cascade_decoder_pipe(
+            image_embeddings=prior_output.image_embeddings.to(torch.float16),
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            num_inference_steps=decoder_num_inference_steps,
+            guidance_scale=decoder_guidance_scale,
+        ).images
+    else:
+        output = pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+            height=height,
+            width=width,
+            generator=generator,
+            num_images_per_prompt=num_images_per_prompt,
+        ).images
     return output
 def generate_arena_images(
     prompt,
     negative_prompt,
+    num_inference_steps_a,
+    guidance_scale_a,
+    num_inference_steps_b,
+    guidance_scale_b,
     height,
     width,
     seed,
     num_images_per_prompt,
+    model_choice_a,
+    model_choice_b,
+    prior_num_inference_steps_a,
+    prior_guidance_scale_a,
+    decoder_num_inference_steps_a,
+    decoder_guidance_scale_a,
+    prior_num_inference_steps_b,
+    prior_guidance_scale_b,
+    decoder_num_inference_steps_b,
+    decoder_guidance_scale_b,
     progress=gr.Progress(track_tqdm=True),
 ):
     if seed == 0:
     generator = torch.Generator().manual_seed(seed)
     # Generate images for both models
+    images_a = generate_single_image(
         prompt,
         negative_prompt,
+        num_inference_steps_a,
+        guidance_scale_a,
         height,
         width,
         seed,
         num_images_per_prompt,
+        model_choice_a,
         generator,
+        prior_num_inference_steps_a,
+        prior_guidance_scale_a,
+        decoder_num_inference_steps_a,
+        decoder_guidance_scale_a,
     )
+    images_b = generate_single_image(
         prompt,
         negative_prompt,
+        num_inference_steps_b,
+        guidance_scale_b,
         height,
         width,
         seed,
         num_images_per_prompt,
+        model_choice_b,
         generator,
+        prior_num_inference_steps_b,
+        prior_guidance_scale_b,
+        decoder_num_inference_steps_b,
+        decoder_guidance_scale_b,
     )
+    return images_a, images_b
 # Define the image generation function for the Individual tab
 @spaces.GPU(duration=80)
     prompt,
     negative_prompt,
     num_inference_steps,
+    guidance_scale,
     height,
     width,
     seed,
     num_images_per_prompt,
     model_choice,
+    prior_num_inference_steps,
+    prior_guidance_scale,
+    decoder_num_inference_steps,
+    decoder_guidance_scale,
     progress=gr.Progress(track_tqdm=True),
 ):
     if seed == 0:
         prompt,
         negative_prompt,
         num_inference_steps,
+        guidance_scale,
         height,
         width,
         seed,
         num_images_per_prompt,
         model_choice,
         generator,
+        prior_num_inference_steps,
+        prior_guidance_scale,
+        decoder_num_inference_steps,
+        decoder_guidance_scale,
     )
     return output
 # Create the Gradio interface
+examples_arena = [
+    [
+        "A woman in a red dress singing on top of a building.",
+        "deformed, distorted, disfigured, poorly drawn, bad anatomy, incorrect anatomy, extra limb, missing limb, floating limbs, mutated hands and fingers, disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation",
+        25,
+        7.5,
+        25,
+        7.5,
+        1024,
+        1024,
+        42,
+        2,
+        "sd3 medium",
+        "sdxl",
+        25, #prior_num_inference_steps_a
+        4.0, #prior_guidance_scale_a
+        12, #decoder_num_inference_steps_a
+        0.0, #decoder_guidance_scale_a
+        25, #prior_num_inference_steps_b
+        4.0, #prior_guidance_scale_b
+        12, #decoder_num_inference_steps_b
+        0.0 #decoder_guidance_scale_b
+    ],
+    [
+        "An astronaut on mars in a futuristic cyborg suit.",
+        "deformed, distorted, disfigured, poorly drawn, bad anatomy, incorrect anatomy, extra limb, missing limb, floating limbs, mutated hands and fingers, disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation",
+        25,
+        7.5,
+        25,
+        7.5,
+        1024,
+        1024,
+        42,
+        2,
+        "sd3 medium",
+        "sdxl",
+        25, #prior_num_inference_steps_a
+        4.0, #prior_guidance_scale_a
+        12, #decoder_num_inference_steps_a
+        0.0, #decoder_guidance_scale_a
+        25, #prior_num_inference_steps_b
+        4.0, #prior_guidance_scale_b
+        12, #decoder_num_inference_steps_b
+        0.0 #decoder_guidance_scale_b
+    ],
+]
+examples_individual = [
+    [
+        "A woman in a red dress singing on top of a building.",
+        "deformed, distorted, disfigured, poorly drawn, bad anatomy, incorrect anatomy, extra limb, missing limb, floating limbs, mutated hands and fingers, disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation",
+        25,
+        7.5,
+        1024,
+        1024,
+        42,
+        2,
+        "sdxl",
+        25, #prior_num_inference_steps
+        4.0, #prior_guidance_scale
+        12, #decoder_num_inference_steps
+        0.0 #decoder_guidance_scale
+    ],
+    [
+        "An astronaut on mars in a futuristic cyborg suit.",
+        "deformed, distorted, disfigured, poorly drawn, bad anatomy, incorrect anatomy, extra limb, missing limb, floating limbs, mutated hands and fingers, disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation",
+        25,
+        7.5,
+        1024,
+        1024,
+        42,
+        2,
+        "sdxl",
+        25, #prior_num_inference_steps
+        4.0, #prior_guidance_scale
+        12, #decoder_num_inference_steps
+        0.0 #decoder_guidance_scale
+    ],
 ]
 css = """
                         info="Describe the image you want",
                         placeholder="A cat...",
                     )
+                    model_choice_a = gr.Dropdown(
+                        label="Stable Diffusion Model A",
+                        choices=["sd3 medium", "sd2.1", "sdxl", "sdxl flash", "stable cascade"],
                         value="sd3 medium",
                     )
+                    model_choice_b = gr.Dropdown(
+                        label="Stable Diffusion Model B",
+                        choices=["sd3 medium", "sd2.1", "sdxl", "sdxl flash", "stable cascade"],
                         value="sdxl",
                     )
                     run_button = gr.Button("Run")
+                result_1 = gr.Gallery(label="Generated Images (Model A)", elem_id="gallery_1")
+                result_2 = gr.Gallery(label="Generated Images (Model B)", elem_id="gallery_2")
             with gr.Accordion("Advanced options", open=False):
+                negative_prompt = gr.Textbox(
+                    label="Negative Prompt",
+                    info="Describe what you don't want in the image",
+                    value="deformed, distorted, disfigured, poorly drawn, bad anatomy, incorrect anatomy, extra limb, missing limb, floating limbs, mutated hands and fingers, disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation",
+                    placeholder="Ugly, bad anatomy...",
+                )
                 with gr.Row():
+                    with gr.Column():
+                        num_inference_steps_a = gr.Slider(
+                            label="Inference Steps (Model A)",
+                            info="The number of denoising steps of the image. More denoising steps usually lead to a higher quality image at the cost of slower inference",
+                            minimum=1,
+                            maximum=50,
+                            value=25,
+                            step=1,
+                            visible=True
+                        )
+                        guidance_scale_a = gr.Slider(
+                            label="Guidance Scale (Model A)",
+                            info="Controls how much the image generation process follows the text prompt. Higher values make the image stick more closely to the input text.",
+                            minimum=0.0,
+                            maximum=10.0,
+                            value=7.5,
+                            step=0.1,
+                            visible=True
+                        )
+                        prior_num_inference_steps_a = gr.Slider(
+                            label="Prior Inference Steps (Model A)",
+                            info="The number of denoising steps of the image. More denoising steps usually lead to a higher quality image at the cost of slower inference",
+                            minimum=1,
+                            maximum=50,
+                            value=25,
+                            step=1,
+                            visible=False
+                        )
+                        prior_guidance_scale_a = gr.Slider(
+                            label="Prior Guidance Scale (Model A)",
+                            info="Controls how much the image generation process follows the text prompt. Higher values make the image stick more closely to the input text.",
+                            minimum=0.0,
+                            maximum=10.0,
+                            value=4.0,
+                            step=0.1,
+                            visible=False
+                        )
+                        decoder_num_inference_steps_a = gr.Slider(
+                            label="Decoder Inference Steps (Model A)",
+                            info="The number of denoising steps of the image. More denoising steps usually lead to a higher quality image at the cost of slower inference",
+                            minimum=1,
+                            maximum=15,
+                            value=15,
+                            step=1,
+                            visible=False
+                        )
+                        decoder_guidance_scale_a = gr.Slider(
+                            label="Decoder Guidance Scale (Model A)",
+                            info="Controls how much the image generation process follows the text prompt. Higher values make the image stick more closely to the input text.",
+                            minimum=0.0,
+                            maximum=10.0,
+                            value=0.0,
+                            step=0.1,
+                            visible=False
+                        )
+                    with gr.Column():
+                        num_inference_steps_b = gr.Slider(
+                            label="Inference Steps (Model B)",
+                            info="The number of denoising steps of the image. More denoising steps usually lead to a higher quality image at the cost of slower inference",
+                            minimum=1,
+                            maximum=50,
+                            value=25,
+                            step=1,
+                            visible=True
+                        )
+                        guidance_scale_b = gr.Slider(
+                            label="Guidance Scale (Model B)",
+                            info="Controls how much the image generation process follows the text prompt. Higher values make the image stick more closely to the input text.",
+                            minimum=0.0,
+                            maximum=10.0,
+                            value=7.5,
+                            step=0.1,
+                            visible=True
+                        )
+                        prior_num_inference_steps_b = gr.Slider(
+                            label="Prior Inference Steps (Model B)",
+                            info="The number of denoising steps of the image. More denoising steps usually lead to a higher quality image at the cost of slower inference",
+                            minimum=1,
+                            maximum=50,
+                            value=25,
+                            step=1,
+                            visible=False
+                        )
+                        prior_guidance_scale_b = gr.Slider(
+                            label="Prior Guidance Scale (Model B)",
+                            info="Controls how much the image generation process follows the text prompt. Higher values make the image stick more closely to the input text.",
+                            minimum=0.0,
+                            maximum=10.0,
+                            value=4.0,
+                            step=0.1,
+                            visible=False
+                        )
+                        decoder_num_inference_steps_b = gr.Slider(
+                            label="Decoder Inference Steps (Model B)",
+                            info="The number of denoising steps of the image. More denoising steps usually lead to a higher quality image at the cost of slower inference",
+                            minimum=1,
+                            maximum=15,
+                            value=12,
+                            step=1,
+                            visible=False
+                        )
+                        decoder_guidance_scale_b = gr.Slider(
+                            label="Decoder Guidance Scale (Model B)",
+                            info="Controls how much the image generation process follows the text prompt. Higher values make the image stick more closely to the input text.",
+                            minimum=0.0,
+                            maximum=10.0,
+                            value=0.0,
+                            step=0.1,
+                            visible=False
+                        )
                 with gr.Row():
                     width = gr.Slider(
                         label="Width",
                         value=2,
                     )
+            def toggle_visibility_arena_a(model_choice_a):
+                if model_choice_a == "stable cascade":
+                    return {
+                        num_inference_steps_a: gr.update(visible=False),
+                        guidance_scale_a: gr.update(visible=False),
+                        prior_num_inference_steps_a: gr.update(visible=True),
+                        prior_guidance_scale_a: gr.update(visible=True),
+                        decoder_num_inference_steps_a: gr.update(visible=True),
+                        decoder_guidance_scale_a: gr.update(visible=True),
+                    }
+                elif model_choice_a == "sdxl flash":
+                    return {
+                        num_inference_steps_a: gr.update(visible=True, maximum=15, value=8),
+                        guidance_scale_a: gr.update(visible=True, maximum=6.0, value=3.5),
+                        prior_num_inference_steps_a: gr.update(visible=False),
+                        prior_guidance_scale_a: gr.update(visible=False),
+                        decoder_num_inference_steps_a: gr.update(visible=False),
+                        decoder_guidance_scale_a: gr.update(visible=False),
+                    }
+                else:
+                    return {
+                        num_inference_steps_a: gr.update(visible=True, maximum=50, value=25),
+                        guidance_scale_a: gr.update(visible=True, maximum=10.0, value=7.5),
+                        prior_num_inference_steps_a: gr.update(visible=False),
+                        prior_guidance_scale_a: gr.update(visible=False),
+                        decoder_num_inference_steps_a: gr.update(visible=False),
+                        decoder_guidance_scale_a: gr.update(visible=False),
+                    }
+            def toggle_visibility_arena_b(model_choice_b):
+                if model_choice_b == "stable cascade":
+                    return {
+                        num_inference_steps_b: gr.update(visible=False),
+                        guidance_scale_b: gr.update(visible=False),
+                        prior_num_inference_steps_b: gr.update(visible=True),
+                        prior_guidance_scale_b: gr.update(visible=True),
+                        decoder_num_inference_steps_b: gr.update(visible=True),
+                        decoder_guidance_scale_b: gr.update(visible=True),
+                    }
+                elif model_choice_b == "sdxl flash":
+                    return {
+                        num_inference_steps_b: gr.update(visible=True, maximum=15, value=8),
+                        guidance_scale_b: gr.update(visible=True, maximum=6.0, value=3.5),
+                        prior_num_inference_steps_b: gr.update(visible=False),
+                        prior_guidance_scale_b: gr.update(visible=False),
+                        decoder_num_inference_steps_b: gr.update(visible=False),
+                        decoder_guidance_scale_b: gr.update(visible=False),
+                    }
+                else:
+                    return {
+                        num_inference_steps_b: gr.update(visible=True, maximum=50, value=25),
+                        guidance_scale_b: gr.update(visible=True, maximum=10.0, value=7.5),
+                        prior_num_inference_steps_b: gr.update(visible=False),
+                        prior_guidance_scale_b: gr.update(visible=False),
+                        decoder_num_inference_steps_b: gr.update(visible=False),
+                        decoder_guidance_scale_b: gr.update(visible=False),
+                    }
+            model_choice_a.change(
+                toggle_visibility_arena_a,
+                inputs=[model_choice_a],
+                outputs=[
+                    num_inference_steps_a,
+                    guidance_scale_a,
+                    prior_num_inference_steps_a,
+                    prior_guidance_scale_a,
+                    decoder_num_inference_steps_a,
+                    decoder_guidance_scale_a
+                ]
+            )
+            model_choice_b.change(
+                toggle_visibility_arena_b,
+                inputs=[model_choice_b],
+                outputs=[
+                    num_inference_steps_b,
+                    guidance_scale_b,
+                    prior_num_inference_steps_b,
+                    prior_guidance_scale_b,
+                    decoder_num_inference_steps_b,
+                    decoder_guidance_scale_b
+                ]
+            )
             gr.Examples(
+                examples=examples_arena,
+                inputs=[
+                    prompt,
+                    negative_prompt,
+                    num_inference_steps_a,
+                    guidance_scale_a,
+                    num_inference_steps_b,
+                    guidance_scale_b,
+                    height,
+                    width,
+                    seed,
+                    num_images_per_prompt,
+                    model_choice_a,
+                    model_choice_b,
+                    prior_num_inference_steps_a,
+                    prior_guidance_scale_a,
+                    decoder_num_inference_steps_a,
+                    decoder_guidance_scale_a,
+                    prior_num_inference_steps_b,
+                    prior_guidance_scale_b,
+                    decoder_num_inference_steps_b,
+                    decoder_guidance_scale_b,
+                ],
                 outputs=[result_1, result_2],
                 fn=generate_arena_images,
             )
                 inputs=[
                     prompt,
                     negative_prompt,
+                    num_inference_steps_a,
+                    guidance_scale_a,
+                    num_inference_steps_b,
+                    guidance_scale_b,
                     height,
+                    width,
                     seed,
                     num_images_per_prompt,
+                    model_choice_a,
+                    model_choice_b,
+                    prior_num_inference_steps_a,
+                    prior_guidance_scale_a,
+                    decoder_num_inference_steps_a,
+                    decoder_guidance_scale_a,
+                    prior_num_inference_steps_b,
+                    prior_guidance_scale_b,
+                    decoder_num_inference_steps_b,
+                    decoder_guidance_scale_b,
                 ],
                 outputs=[result_1, result_2],
             )
                     )
                     model_choice = gr.Dropdown(
                         label="Stable Diffusion Model",
+                        choices=["sd3 medium", "sd2.1", "sdxl", "sdxl flash", "stable cascade"],
                         value="sd3 medium",
                     )
                     run_button = gr.Button("Run")
                         maximum=50,
                         value=25,
                         step=1,
+                        visible=True
                     )
                     guidance_scale = gr.Slider(
                         label="Guidance Scale",
                         maximum=10.0,
                         value=7.5,
                         step=0.1,
+                        visible=True
+                    )
+                    prior_num_inference_steps = gr.Slider(
+                        label="Prior Inference Steps",
+                        info="The number of denoising steps of the image. More denoising steps usually lead to a higher quality image at the cost of slower inference",
+                        minimum=1,
+                        maximum=50,
+                        value=25,
+                        step=1,
+                        visible=False
+                    )
+                    prior_guidance_scale = gr.Slider(
+                        label="Prior Guidance Scale",
+                        info="Controls how much the image generation process follows the text prompt. Higher values make the image stick more closely to the input text.",
+                        minimum=0.0,
+                        maximum=10.0,
+                        value=4.0,
+                        step=0.1,
+                        visible=False
+                    )
+                    decoder_num_inference_steps = gr.Slider(
+                        label="Decoder Inference Steps",
+                        info="The number of denoising steps of the image. More denoising steps usually lead to a higher quality image at the cost of slower inference",
+                        minimum=1,
+                        maximum=15,
+                        value=12,
+                        step=1,
+                        visible=False
+                    )
+                    decoder_guidance_scale = gr.Slider(
+                        label="Decoder Guidance Scale",
+                        info="Controls how much the image generation process follows the text prompt. Higher values make the image stick more closely to the input text.",
+                        minimum=0.0,
+                        maximum=10.0,
+                        value=0.0,
+                        step=0.1,
+                        visible=False
                     )
                 with gr.Row():
                     width = gr.Slider(
                         value=2,
                     )
+            def toggle_visibility_individual(model_choice):
+                if model_choice == "stable cascade":
+                    return {
+                        num_inference_steps: gr.update(visible=False),
+                        guidance_scale: gr.update(visible=False),
+                        prior_num_inference_steps: gr.update(visible=True),
+                        prior_guidance_scale: gr.update(visible=True),
+                        decoder_num_inference_steps: gr.update(visible=True),
+                        decoder_guidance_scale: gr.update(visible=True),
+                    }
+                elif model_choice == "sdxl flash":
+                    return {
+                        num_inference_steps: gr.update(visible=True, maximum=15, value=8),
+                        guidance_scale: gr.update(visible=True, maximum=6.0, value=3.5),
+                        prior_num_inference_steps: gr.update(visible=False),
+                        prior_guidance_scale: gr.update(visible=False),
+                        decoder_num_inference_steps: gr.update(visible=False),
+                        decoder_guidance_scale: gr.update(visible=False),
+                    }
+                else:
+                    return {
+                        num_inference_steps: gr.update(visible=True, maximum=50, value=25),
+                        guidance_scale: gr.update(visible=True, maximum=10.0, value=7.5),
+                        prior_num_inference_steps: gr.update(visible=False),
+                        prior_guidance_scale: gr.update(visible=False),
+                        decoder_num_inference_steps: gr.update(visible=False),
+                        decoder_guidance_scale: gr.update(visible=False),
+                    }
+            model_choice.change(
+                toggle_visibility_individual,
+                inputs=[model_choice],
+                outputs=[
+                    num_inference_steps,
+                    guidance_scale,
+                    prior_num_inference_steps,
+                    prior_guidance_scale,
+                    decoder_num_inference_steps,
+                    decoder_guidance_scale
+                ]
+            )
             gr.Examples(
+                examples=examples_individual,
+                inputs=[
+                    prompt,
+                    negative_prompt,
+                    num_inference_steps,
+                    guidance_scale,
+                    height,
+                    width,
+                    seed,
+                    num_images_per_prompt,
+                    model_choice,
+                    prior_num_inference_steps,
+                    prior_guidance_scale,
+                    decoder_num_inference_steps,
+                    decoder_guidance_scale,
+                ],
                 outputs=[result],
                 fn=generate_individual_image,
             )
                     prompt,
                     negative_prompt,
                     num_inference_steps,
                     guidance_scale,
+                    height,
+                    width,
                     seed,
                     num_images_per_prompt,
                     model_choice,
+                    prior_num_inference_steps,
+                    prior_guidance_scale,
+                    decoder_num_inference_steps,
+                    decoder_guidance_scale,
                 ],
                 outputs=[result],
             )