stable-diffusion-3.5-large-turbo

File size: 4,692 Bytes

ad22c5c
 
 
 
 
 
371ea68
ad22c5c
 
 
371ea68
b326fe8
46528f5
ad22c5c
 
 
 
 
 
371ea68
b326fe8
46528f5
b326fe8
ad22c5c
 
 
237ee28
ad22c5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5188b2b
ad22c5c
237ee28
ad22c5c
 
237ee28
 
ad22c5c
 
 
 
 
 
 
 
 
237ee28
ad22c5c
237ee28
 
ad22c5c
237ee28
ad22c5c

import gradio as gr
import numpy as np
import random

import spaces
from diffusers import DiffusionPipeline
from transformers import T5EncoderModel, CLIPTextModelWithProjection
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
text_encoder_repo = "silveroxides/CLIP_L_Fur"
text_encoder_2_repo = "silveroxides/SeaArtFurryCLIP_G"
text_encoder_3_repo = "silveroxides/t5xxl_flan_enc"
model_repo_id = "stabilityai/stable-diffusion-3.5-large-turbo"

if torch.cuda.is_available():
    torch_dtype = torch.bfloat16
else:
    torch_dtype = torch.float32
text_encoder = CLIPTextModelWithProjection.from_pretrained(text_encoder_repo, torch_dtype=torch_dtype)
text_encoder_2 = CLIPTextModelWithProjection.from_pretrained(text_encoder_2_repo, torch_dtype=torch_dtype)
text_encoder_3 = T5EncoderModel.from_pretrained(text_encoder_3_repo, torch_dtype=torch_dtype)
pipe = DiffusionPipeline.from_pretrained(model_repo_id, text_encoder=text_encoder, text_encoder_2=text_encoder_2, text_encoder_3=text_encoder_3, torch_dtype=torch_dtype)
pipe = pipe.to(device)

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1216

@spaces.GPU
def infer(
    prompt,
    negative_prompt="",
    seed=42,
    randomize_seed=False,
    width=1024,
    height=1024,
    guidance_scale=0.0,
    num_inference_steps=4,
    progress=gr.Progress(track_tqdm=True),
):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    generator = torch.Generator().manual_seed(seed)

    image = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        guidance_scale=guidance_scale,
        num_inference_steps=num_inference_steps,
        width=width,
        height=height,
        generator=generator,
    ).images[0]

    return image, seed


examples = [
        "A capybara wearing a suit holding a sign that reads Hello World",
]

css = """
#col-container {
    margin: 0 auto;
    max-width: 640px;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown(" # [Stable Diffusion 3.5 Large Turbo (8B)](https://huggingface.co/stabilityai/stable-diffusion-3.5-large-turbo)")
        gr.Markdown("Space for testing alternative text encoders with SD 3.5 L Turbo")
        with gr.Row():
            prompt = gr.Textbox(
                label="Prompt",
                show_label=False,
                max_lines=4,
                lines=4,
                placeholder="Enter your prompt",
                container=False,
            )

            run_button = gr.Button("Run", scale=0, variant="primary")

        result = gr.Image(label="Result", show_label=False)

        with gr.Accordion("Advanced Settings", open=False):
            negative_prompt = gr.Textbox(
                label="Negative prompt",
                max_lines=2,
                lines=2,
                placeholder="Enter a negative prompt",
                visible=True,
            )

            seed = gr.Slider(
                label="Seed",
                minimum=0,
                maximum=MAX_SEED,
                step=1,
                value=0,
            )

            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

            with gr.Row():
                width = gr.Slider(
                    label="Width",
                    minimum=512,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=1024, 
                )

                height = gr.Slider(
                    label="Height",
                    minimum=512,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=1024,
                )

            with gr.Row():
                guidance_scale = gr.Slider(
                    label="Guidance scale",
                    minimum=0.0,
                    maximum=7.5,
                    step=0.1,
                    value=0.0,
                )

                num_inference_steps = gr.Slider(
                    label="Number of inference steps",
                    minimum=1,
                    maximum=50,
                    step=1,
                    value=4, 
                )

        gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=True, cache_mode="lazy")
    gr.on(
        triggers=[run_button.click, prompt.submit],
        fn=infer,
        inputs=[
            prompt,
            negative_prompt,
            seed,
            randomize_seed,
            width,
            height,
            guidance_scale,
            num_inference_steps,
        ],
        outputs=[result, seed],
    )

if __name__ == "__main__":
    demo.launch()