File size: 4,098 Bytes
b13990e
65fd06d
 
 
 
 
b7fbd38
504fb8a
 
65fd06d
b13990e
65fd06d
 
 
7f11b82
65fd06d
b13990e
 
 
65fd06d
 
b13990e
cad2673
b13990e
 
 
 
 
65fd06d
 
 
 
 
 
 
 
b13990e
 
88cc598
 
0810225
 
88cc598
65fd06d
 
504fb8a
7db3ca3
 
 
 
 
 
 
504fb8a
 
 
 
 
 
 
 
65fd06d
108abb9
895e905
 
 
611d1ae
9aa1c3d
895e905
 
108abb9
 
 
 
 
 
 
 
 
 
 
 
504fb8a
65fd06d
108abb9
 
 
 
ec93dcb
108abb9
 
 
 
 
 
65fd06d
108abb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b13990e
 
108abb9
b13990e
108abb9
 
b13990e
 
108abb9
b13990e
65fd06d
 
 
 
 
 
 
7db3ca3
 
7f11b82
65fd06d
 
 
 
 
 
 
8790f79
504fb8a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141

#!/usr/bin/env python

import os
import gradio as gr
import numpy as np
from PIL
import base64
import io
import torch
from diffusers import LCMScheduler, AutoPipelineForText2Image

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = int(os.getenv('MAX_IMAGE_SIZE', '1024'))
SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')

MODEL_ID = "segmind/SSD-1B"
ADAPTER_ID = "latent-consistency/lcm-lora-ssd-1b"

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
if torch.cuda.is_available():
    pipe = AutoPipelineForText2Image.from_pretrained(MODEL_ID, torch_dtype=torch.float16, variant="fp16")
    pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
    pipe.to("cuda")

    # load and fuse
    pipe.load_lora_weights(ADAPTER_ID)
    pipe.fuse_lora()
else:
    pipe = None

def generate(prompt: str,
             negative_prompt: str = '',
             seed: int = 0,
             width: int = 1024,
             height: int = 1024,
             guidance_scale: float = 0.0,
             num_inference_steps: int = 4,
             secret_token: str = '') -> PIL.Image.Image:
    if secret_token != SECRET_TOKEN:
        raise gr.Error(
            f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
        
    generator = torch.Generator().manual_seed(seed)

    image = pipe(prompt=prompt,
                negative_prompt=negative_prompt,
                width=width,
                height=height,
                guidance_scale=guidance_scale,
                num_inference_steps=num_inference_steps,
                generator=generator,
                output_type='pil').images[0]
    
    # Convert PIL image to a byte stream
    buffered = io.BytesIO()
    image.save(buffered, format="PNG")
    
    # Encode to base64
    img_str = base64.b64encode(buffered.getvalue()).decode()
    return img_str

with gr.Blocks() as demo:
    gr.HTML("""
    <div style="z-index: 100; position: fixed; top: 0px; right: 0px; left: 0px; bottom: 0px; width: 100%; height: 100%; background: white; display: flex; align-items: center; justify-content: center; color: black;">
        <div style="text-align: center; color: black;">
            <p style="color: black;">This space is a REST API to programmatically generate images using LCM LoRA SSD-1B.</p>
            <p style="color: black;">It is not meant to be directly used through a user interface, but using code and an access key.</p>
        </div>
    </div>""")
    secret_token = gr.Text(
        label='Secret Token',
        max_lines=1,
        placeholder='Enter your secret token',
    )
    prompt = gr.Text(
        label='Prompt',
        show_label=False,
        max_lines=1,
        placeholder='Enter your prompt',
        container=False,
    )
    result = gr.Image(label='Result', show_label=False, type="base64")

    negative_prompt = gr.Text(
        label='Negative prompt',
        max_lines=1,
        placeholder='Enter a negative prompt',
        visible=True,
    )
    seed = gr.Slider(label='Seed',
                    minimum=0,
                    maximum=MAX_SEED,
                    step=1,
                    value=0)

    width = gr.Slider(
        label='Width',
        minimum=256,
        maximum=MAX_IMAGE_SIZE,
        step=32,
        value=1024,
    )
    height = gr.Slider(
        label='Height',
        minimum=256,
        maximum=MAX_IMAGE_SIZE,
        step=32,
        value=1024,
    )
    guidance_scale = gr.Slider(
        label='Guidance scale',
        minimum=0,
        maximum=2,
        step=0.1,
        value=0.0)
    num_inference_steps = gr.Slider(
        label='Number of inference steps',
        minimum=1,
        maximum=8,
        step=1,
        value=4)

    inputs = [
        prompt,
        negative_prompt,
        seed,
        width,
        height,
        guidance_scale,
        num_inference_steps,
        secret_token,
    ]
    prompt.submit(
        fn=generate,
        inputs=inputs,
        outputs=result,
        api_name='run',
    )

demo.queue(max_size=32).launch()