Spaces:
Running
on
Zero
Running
on
Zero
File size: 6,236 Bytes
cd58335 81d8196 42eccb2 c54d478 ec3c4e8 c54d478 a423cc4 718bcd6 c54d478 ec3c4e8 c54d478 0aa60cd 08c5790 c54d478 c0c6c3f 0aa60cd c0c6c3f 0aa60cd c0c6c3f c23df4e c0c6c3f ec3c4e8 c54d478 94d1cb1 c0c6c3f c23df4e 45df7f2 c23df4e c0c6c3f 22e8896 c0c6c3f c23df4e c379910 4b94657 718bcd6 ec3c4e8 ea188c0 c54d478 93ab815 45df7f2 bc461ad c54d478 ec3c4e8 c54d478 ec3c4e8 0aa60cd ec3c4e8 c54d478 ec3c4e8 c54d478 93ab815 c54d478 93ab815 c54d478 ec3c4e8 93ab815 ec3c4e8 93ab815 ea188c0 93ab815 ec3c4e8 c54d478 93ab815 c54d478 93ab815 c54d478 ec3c4e8 93ab815 ec3c4e8 c54d478 ec3c4e8 93ab815 ec3c4e8 93ab815 ec3c4e8 0aa60cd ec3c4e8 93ab815 ec3c4e8 0aa60cd ec3c4e8 93ab815 ec3c4e8 0aa60cd ec3c4e8 93ab815 ec3c4e8 c54d478 ec3c4e8 ea188c0 c54d478 ec3c4e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
# Thanks: https://huggingface.co/spaces/stabilityai/stable-diffusion-3-medium
import spaces
import os
import gradio as gr
import numpy as np
import random
import torch
from diffusers import StableDiffusion3Pipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
device = "cuda"
dtype = torch.float16
repo = "stabilityai/stable-diffusion-3.5-large"
t2i = StableDiffusion3Pipeline.from_pretrained(repo, torch_dtype=torch.bfloat16, token=os.environ["TOKEN"]).to(device)
model = AutoModelForCausalLM.from_pretrained(
"microsoft/Phi-3.5-mini-instruct",
device_map="cuda",
torch_dtype=torch.bfloat16,
trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct")
upsampler = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
)
generation_args = {
"max_new_tokens": 300,
"return_full_text": False,
"temperature": 0.7,
"do_sample": True,
"top_p": 0.95
}
MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1344
@spaces.GPU
def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
messages = [
{"role": "user", "content": "クールなアニメ風の女の子"},
{"role": "assistant", "content": "An anime style illustration of a cool-looking teenage girl with an edgy, confident expression. She has piercing eyes, a slight smirk, and colorful hair that flows in the wind. She wears a trendy punk-inspired outfit with a leather jacket, ripped jeans, and combat boots. The background has an urban nighttime feel with city lights and graffiti to match her rebellious vibe. The colors are vibrant with high contrast to give an impactful look. The overall style captures her undeniable coolness and fearless attitude."},
{"role": "user", "content": "実写風の女子高生"},
{"role": "assistant", "content": "A photorealistic image of a female high school student standing on a city street. She is wearing a traditional Japanese school uniform, consisting of a navy blue blazer, a white blouse, and a knee-length plaid skirt. Her black hair is styled in a neat shoulder-length bob, and she carries a red backpack. The setting is an urban backdrop with cherry blossoms in bloom, suggesting early spring. The lighting is soft and natural, enhancing the realism of the scene."},
{"role": "user", "content": prompt },
]
output = upsampler(messages, **generation_args)
upsampled_prompt=output[0]['generated_text']
print(upsampled_prompt)
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator().manual_seed(seed)
image = t2i(
prompt = upsampled_prompt,
negative_prompt = negative_prompt,
guidance_scale = guidance_scale,
num_inference_steps = num_inference_steps,
width = width,
height = height,
generator = generator
).images[0]
return image, seed, upsampled_prompt
examples = [
"美味しい肉",
"馬に乗った宇宙飛行士",
"アニメ風の美少女",
"女子高生の写真",
"寿司でできた家に入っているコーギー",
"バナナとアボカドが戦っている様子"
]
css="""
#col-container {
margin: 0 auto;
max-width: 580px;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown(f"""
# 日本語が入力できる SD3.5 Large
""")
with gr.Row():
prompt = gr.Text(
label="プロンプト",
show_label=False,
max_lines=1,
placeholder="作りたい画像の特徴を入力してください",
container=False,
)
run_button = gr.Button("実行", scale=0)
result = gr.Image(label="結果", show_label=False)
generated_prompt = gr.Textbox(label="生成に使ったプロンプト", show_label=False, interactive=False)
with gr.Accordion("詳細設定", open=False):
negative_prompt = gr.Text(
label="ネガティブプロンプト",
max_lines=1,
placeholder="画像から排除したい要素を入力してください",
)
seed = gr.Slider(
label="乱数のシード",
minimum=0,
maximum=MAX_SEED,
step=1,
value=0,
)
randomize_seed = gr.Checkbox(label="ランダム生成", value=True)
with gr.Row():
width = gr.Slider(
label="横",
minimum=256,
maximum=MAX_IMAGE_SIZE,
step=64,
value=1024+512,
)
height = gr.Slider(
label="縦",
minimum=256,
maximum=MAX_IMAGE_SIZE,
step=64,
value=1024+512,
)
with gr.Row():
guidance_scale = gr.Slider(
label="プロンプトの忠実さ",
minimum=0.0,
maximum=10.0,
step=0.1,
value=3.5,
)
num_inference_steps = gr.Slider(
label="推論回数",
minimum=1,
maximum=50,
step=1,
value=28,
)
gr.Examples(
examples = examples,
inputs = [prompt]
)
gr.on(
triggers=[run_button.click, prompt.submit, negative_prompt.submit],
fn = infer,
inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
outputs = [result, seed, generated_prompt]
)
demo.launch() |