silveroxides's picture
Update app.py
b326fe8 verified
raw
history blame
4.69 kB
import gradio as gr
import numpy as np
import random
import spaces
from diffusers import DiffusionPipeline
from transformers import T5EncoderModel, CLIPTextModelWithProjection
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
text_encoder_repo = "silveroxides/CLIP_L_Fur"
text_encoder_2_repo = "silveroxides/SeaArtFurryCLIP_G"
text_encoder_3_repo = "silveroxides/t5xxl_flan_enc"
model_repo_id = "stabilityai/stable-diffusion-3.5-large-turbo"
if torch.cuda.is_available():
torch_dtype = torch.bfloat16
else:
torch_dtype = torch.float32
text_encoder = CLIPTextModelWithProjection.from_pretrained(text_encoder_repo, torch_dtype=torch_dtype)
text_encoder_2 = CLIPTextModelWithProjection.from_pretrained(text_encoder_2_repo, torch_dtype=torch_dtype)
text_encoder_3 = T5EncoderModel.from_pretrained(text_encoder_3_repo, torch_dtype=torch_dtype)
pipe = DiffusionPipeline.from_pretrained(model_repo_id, text_encoder=text_encoder, text_encoder_2=text_encoder_2, text_encoder_3=text_encoder_3, torch_dtype=torch_dtype)
pipe = pipe.to(device)
MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1216
@spaces.GPU
def infer(
prompt,
negative_prompt="",
seed=42,
randomize_seed=False,
width=1024,
height=1024,
guidance_scale=0.0,
num_inference_steps=4,
progress=gr.Progress(track_tqdm=True),
):
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator().manual_seed(seed)
image = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
width=width,
height=height,
generator=generator,
).images[0]
return image, seed
examples = [
"A capybara wearing a suit holding a sign that reads Hello World",
]
css = """
#col-container {
margin: 0 auto;
max-width: 640px;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown(" # [Stable Diffusion 3.5 Large Turbo (8B)](https://huggingface.co/stabilityai/stable-diffusion-3.5-large-turbo)")
gr.Markdown("Space for testing alternative text encoders with SD 3.5 L Turbo")
with gr.Row():
prompt = gr.Textbox(
label="Prompt",
show_label=False,
max_lines=4,
lines=4,
placeholder="Enter your prompt",
container=False,
)
run_button = gr.Button("Run", scale=0, variant="primary")
result = gr.Image(label="Result", show_label=False)
with gr.Accordion("Advanced Settings", open=False):
negative_prompt = gr.Textbox(
label="Negative prompt",
max_lines=2,
lines=2,
placeholder="Enter a negative prompt",
visible=True,
)
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=0,
)
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
with gr.Row():
width = gr.Slider(
label="Width",
minimum=512,
maximum=MAX_IMAGE_SIZE,
step=32,
value=1024,
)
height = gr.Slider(
label="Height",
minimum=512,
maximum=MAX_IMAGE_SIZE,
step=32,
value=1024,
)
with gr.Row():
guidance_scale = gr.Slider(
label="Guidance scale",
minimum=0.0,
maximum=7.5,
step=0.1,
value=0.0,
)
num_inference_steps = gr.Slider(
label="Number of inference steps",
minimum=1,
maximum=50,
step=1,
value=4,
)
gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=True, cache_mode="lazy")
gr.on(
triggers=[run_button.click, prompt.submit],
fn=infer,
inputs=[
prompt,
negative_prompt,
seed,
randomize_seed,
width,
height,
guidance_scale,
num_inference_steps,
],
outputs=[result, seed],
)
if __name__ == "__main__":
demo.launch()