Spaces:

alfredplpl
/

sd3-with-LLM

Running on Zero

File size: 6,236 Bytes

cd58335
81d8196
42eccb2
c54d478
 
ec3c4e8
c54d478
a423cc4
718bcd6
c54d478
ec3c4e8
 
c54d478
0aa60cd
08c5790
c54d478
c0c6c3f
0aa60cd
c0c6c3f
 
 
 
0aa60cd
c0c6c3f
 
 
 
 
 
 
 
 
c23df4e
 
 
c0c6c3f
 
ec3c4e8
 
c54d478
94d1cb1
 
c0c6c3f
c23df4e
 
45df7f2
 
c23df4e
c0c6c3f
22e8896
c0c6c3f
c23df4e
 
c379910
 
 
 
 
4b94657
718bcd6
ec3c4e8
 
 
 
 
 
 
 
ea188c0
c54d478
 
93ab815
 
 
45df7f2
bc461ad
 
c54d478
 
ec3c4e8
 
 
 
 
 
 
c54d478
ec3c4e8
 
 
0aa60cd
ec3c4e8
 
c54d478
ec3c4e8
c54d478
93ab815
c54d478
 
93ab815
c54d478
 
ec3c4e8
93ab815
ec3c4e8
93ab815
ea188c0
 
93ab815
ec3c4e8
c54d478
93ab815
c54d478
93ab815
c54d478
ec3c4e8
 
93ab815
ec3c4e8
 
 
 
c54d478
ec3c4e8
93ab815
ec3c4e8
 
 
 
93ab815
ec3c4e8
 
 
0aa60cd
ec3c4e8
 
 
93ab815
ec3c4e8
 
 
0aa60cd
ec3c4e8
 
 
 
 
93ab815
ec3c4e8
 
 
0aa60cd
ec3c4e8
 
 
93ab815
ec3c4e8
 
 
 
 
 
 
 
 
 
c54d478
ec3c4e8
 
 
ea188c0
c54d478
 
ec3c4e8

# Thanks: https://huggingface.co/spaces/stabilityai/stable-diffusion-3-medium
import spaces
import os
import gradio as gr
import numpy as np
import random
import torch
from diffusers import StableDiffusion3Pipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

device = "cuda"
dtype = torch.float16

repo = "stabilityai/stable-diffusion-3.5-large"
t2i = StableDiffusion3Pipeline.from_pretrained(repo, torch_dtype=torch.bfloat16, token=os.environ["TOKEN"]).to(device)

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3.5-mini-instruct", 
    device_map="cuda", 
    torch_dtype=torch.bfloat16, 
    trust_remote_code=True, 
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct")
upsampler = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

generation_args = {
    "max_new_tokens": 300,
    "return_full_text": False,
    "temperature": 0.7,
    "do_sample": True,
    "top_p": 0.95
}

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1344

@spaces.GPU
def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
    messages = [
        {"role": "user", "content": "クールなアニメ風の女の子"},
        {"role": "assistant", "content": "An anime style illustration of a cool-looking teenage girl with an edgy, confident expression. She has piercing eyes, a slight smirk, and colorful hair that flows in the wind. She wears a trendy punk-inspired outfit with a leather jacket, ripped jeans, and combat boots. The background has an urban nighttime feel with city lights and graffiti to match her rebellious vibe. The colors are vibrant with high contrast to give an impactful look. The overall style captures her undeniable coolness and fearless attitude."},
        {"role": "user", "content": "実写風の女子高生"},
        {"role": "assistant", "content": "A photorealistic image of a female high school student standing on a city street. She is wearing a traditional Japanese school uniform, consisting of a navy blue blazer, a white blouse, and a knee-length plaid skirt. Her black hair is styled in a neat shoulder-length bob, and she carries a red backpack. The setting is an urban backdrop with cherry blossoms in bloom, suggesting early spring. The lighting is soft and natural, enhancing the realism of the scene."},
        {"role": "user", "content": prompt },
    ]
    output = upsampler(messages, **generation_args)
    upsampled_prompt=output[0]['generated_text']
    print(upsampled_prompt)
    
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
        
    generator = torch.Generator().manual_seed(seed)
    
    image = t2i(
        prompt = upsampled_prompt, 
        negative_prompt = negative_prompt,
        guidance_scale = guidance_scale, 
        num_inference_steps = num_inference_steps, 
        width = width, 
        height = height,
        generator = generator
    ).images[0] 
    
    return image, seed, upsampled_prompt

examples = [
    "美味しい肉",
    "馬に乗った宇宙飛行士",
    "アニメ風の美少女",
    "女子高生の写真",
    "寿司でできた家に入っているコーギー",
    "バナナとアボカドが戦っている様子"
]

css="""
#col-container {
    margin: 0 auto;
    max-width: 580px;
}
"""

with gr.Blocks(css=css) as demo:
    
    with gr.Column(elem_id="col-container"):
        gr.Markdown(f"""
        # 日本語が入力できる SD3.5 Large
        """)
        
        with gr.Row():
            
            prompt = gr.Text(
                label="プロンプト",
                show_label=False,
                max_lines=1,
                placeholder="作りたい画像の特徴を入力してください",
                container=False,
            )
            
            run_button = gr.Button("実行", scale=0)
        
        result = gr.Image(label="結果", show_label=False)
        generated_prompt = gr.Textbox(label="生成に使ったプロンプト", show_label=False, interactive=False)
        
        with gr.Accordion("詳細設定", open=False):
            
            negative_prompt = gr.Text(
                label="ネガティブプロンプト",
                max_lines=1,
                placeholder="画像から排除したい要素を入力してください",
            )
            
            seed = gr.Slider(
                label="乱数のシード",
                minimum=0,
                maximum=MAX_SEED,
                step=1,
                value=0,
            )
            
            randomize_seed = gr.Checkbox(label="ランダム生成", value=True)
            
            with gr.Row():
                
                width = gr.Slider(
                    label="横",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=64,
                    value=1024+512,
                )
                
                height = gr.Slider(
                    label="縦",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=64,
                    value=1024+512,
                )
            
            with gr.Row():
                
                guidance_scale = gr.Slider(
                    label="プロンプトの忠実さ",
                    minimum=0.0,
                    maximum=10.0,
                    step=0.1,
                    value=3.5,
                )
                
                num_inference_steps = gr.Slider(
                    label="推論回数",
                    minimum=1,
                    maximum=50,
                    step=1,
                    value=28,
                )
        
        gr.Examples(
            examples = examples,
            inputs = [prompt]
        )
    gr.on(
        triggers=[run_button.click, prompt.submit, negative_prompt.submit],
        fn = infer,
        inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
        outputs = [result, seed, generated_prompt]
    )

demo.launch()