| import gradio as gr |
| import spaces |
| import torch |
| from diffusers import StableDiffusionPipeline |
| from PIL import Image |
| import numpy as np |
| import imageio |
| import tempfile |
| import os |
|
|
| MODEL_ID = "stabilityai/stable-diffusion-2" |
|
|
| |
| pipe = None |
|
|
|
|
| def initialize_pipeline(): |
| """Initialize the pipeline if not already loaded.""" |
| global pipe |
| if pipe is None: |
| device = "cuda" if torch.cuda.is_available() else "cpu" |
| print(f"Initializing pipeline on device: {device}") |
|
|
| pipe = StableDiffusionPipeline.from_pretrained( |
| MODEL_ID, |
| torch_dtype=torch.float16 if device == "cuda" else torch.float32, |
| ) |
| pipe = pipe.to(device) |
| return pipe |
|
|
|
|
| @spaces.GPU |
| def generate_image(prompt, seed, num_inference_steps): |
| |
| |
| pipeline = initialize_pipeline() |
| device = pipeline.device |
|
|
| |
| generator = torch.Generator(device=device).manual_seed(int(seed)) |
|
|
| |
| frames = [] |
|
|
| def callback(step: int, timestep: int, latents): |
| |
| with torch.no_grad(): |
| image = pipeline.decode_latents(latents) |
| image = pipeline.numpy_to_pil(image)[0] |
| frames.append(image) |
|
|
| |
| with torch.no_grad(): |
| result = pipeline( |
| prompt=prompt, |
| num_inference_steps=int(num_inference_steps), |
| generator=generator, |
| callback=callback, |
| callback_steps=1, |
| ) |
|
|
| |
| with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile: |
| video_path = tmpfile.name |
| imageio.mimsave(video_path, frames, fps=5) |
|
|
| |
| return result.images[0], video_path |
|
|
|
|
| def create_interface(): |
| """Create and configure the Gradio interface.""" |
| |
| interface = gr.Interface( |
| fn=generate_image, |
| inputs=[ |
| gr.Textbox( |
| label="Prompt", |
| placeholder="Enter a text description of the image you want to generate...", |
| lines=3, |
| ), |
| gr.Slider( |
| minimum=0, |
| maximum=1000000, |
| randomize=True, |
| step=1, |
| label="Seed", |
| info="Random seed for reproducibility", |
| ), |
| gr.Slider( |
| minimum=1, |
| maximum=50, |
| value=15, |
| step=1, |
| label="Diffusion Steps", |
| info="Number of denoising steps (more steps = higher quality but slower)", |
| ), |
| ], |
| outputs=[ |
| gr.Image(label="Generated Image", type="pil"), |
| gr.Video(label="Diffusion Steps Video"), |
| ], |
| title="Stable Diffusion Image Generator", |
| description="Generate images from text using Stable Diffusion. Enter a prompt, set the seed for reproducibility, and adjust the number of diffusion steps. Watch the diffusion process as a video.", |
| examples=[ |
| ["A beautiful sunset over mountains", 42213, 50], |
| ["A dog wearing a space suit, floating in space, hand-drawn illustration", 83289, 20], |
| ["Cyberpunk city at night, neon lights", 12056, 40], |
| ], |
| cache_examples=False, |
| ) |
|
|
| return interface |
|
|
|
|
| if __name__ == "__main__": |
| |
| demo = create_interface() |
| demo.launch(share=False, server_name="0.0.0.0", server_port=7860) |
|
|