Spaces:

joshuaberkowitzus
/

gemini-deep-research-text-to-image-demo

Running

App Files Files Community

gemini-deep-research-text-to-image-demo / app.py

joshuaberkowitzus

init

36c749c verified 3 months ago

raw

history blame contribute delete

4.5 kB

	import gradio as gr
	from diffusers import DiffusionPipeline
	import torch
	import os

	# Ensure necessary libraries are installed
	# pip install diffusers --upgrade
	# pip install invisible_watermark transformers accelerate safetensors gradio torch

	model_id = "stabilityai/stable-diffusion-xl-base-1.0"

	# Determine device and dtype
	if torch.cuda.is_available():
	device = "cuda"
	dtype = torch.float16
	print("Using CUDA (GPU).")
	# elif torch.backends.mps.is_available(): # Uncomment for MacOS Metal support
	# device = "mps"
	# dtype = torch.float16
	# print("Using MPS (Apple Silicon GPU).")
	else:
	device = "cpu"
	dtype = torch.float32
	print("Using CPU.")

	# Load the Stable Diffusion XL pipeline
	# Using float16 and safetensors for efficiency if on GPU
	# variant="fp16" loads the fp16 weights
	try:
	pipe = DiffusionPipeline.from_pretrained(
	model_id,
	torch_dtype=dtype,
	use_safetensors=True,
	variant="fp16" if device!= "cpu" else None # Only use fp16 variant if not on CPU
	)
	pipe.to(device)

	# Optional: Enable CPU offloading if VRAM is limited (only works on CUDA)
	if device == "cuda":
	try:
	# Check VRAM - this is a rough estimate, adjust threshold as needed
	total_vram_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
	if total_vram_gb < 10: # Example threshold: less than 10GB VRAM
	print(f"Low VRAM ({total_vram_gb:.2f}GB detected). Enabling model CPU offload.")
	pipe.enable_model_cpu_offload()
	except Exception as offload_err:
	print(f"Could not check VRAM or enable offload: {offload_err}")


	# Optional: Use torch.compile for speedup (requires torch >= 2.0)
	# if device!= "cpu" and hasattr(torch, "compile"):
	# try:
	# print("Attempting to compile the UNet...")
	# pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
	# print("UNet compiled successfully.")
	# except Exception as compile_err:
	# print(f"Torch compile failed: {compile_err}")

	print(f"SDXL pipeline loaded successfully on {device}.")

	except Exception as e:
	print(f"Error loading SDXL pipeline: {e}")
	pipe = None

	def generate_image(prompt):
	"""Generates an image based on the text prompt."""
	if pipe is None:
	# Handle case where pipeline failed to load
	# Create a placeholder image or return an error message
	from PIL import Image, ImageDraw, ImageFont
	img = Image.new('RGB', (512, 512), color = (200, 200, 200))
	d = ImageDraw.Draw(img)
	try:
	# Try to load a default font
	fnt = ImageFont.truetype("arial.ttf", 15)
	except IOError:
	fnt = ImageFont.load_default()
	d.text((10,10), "Error: Model pipeline failed to load.", fill=(255,0,0), font=fnt)
	return img

	if not prompt:
	return None # Return nothing if prompt is empty

	print(f"Generating image for prompt: '{prompt}'")
	try:
	# Generate the image
	# Using default steps/guidance scale, can be customized
	with torch.inference_mode(): # Use inference mode for efficiency
	image = pipe(prompt=prompt, num_inference_steps=30).images
	print("Image generated successfully.")
	return image
	except Exception as e:
	print(f"Error during image generation: {e}")
	# Return an error image or message
	from PIL import Image, ImageDraw, ImageFont
	img = Image.new('RGB', (512, 512), color = (200, 200, 200))
	d = ImageDraw.Draw(img)
	try: fnt = ImageFont.truetype("arial.ttf", 15)
	except IOError: fnt = ImageFont.load_default()
	d.text((10,10), f"Error generating image:\n{e}", fill=(255,0,0), font=fnt)
	return img


	# Create the Gradio interface
	demo = gr.Interface(
	fn=generate_image,
	inputs=gr.Textbox(label="Enter Text Prompt", placeholder="e.g., 'An astronaut riding a green horse'"),
	outputs=gr.Image(label="Generated Image", type="pil"),
	title="Text-to-Image Generation with Stable Diffusion XL",
	description=f"Generate images from text prompts using the {model_id} model. Loading and inference might take a moment, especially on the first run or on CPU.",
	examples=["A high-tech cityscape at sunset, cinematic lighting"]
	)

	if __name__ == "__main__":
	# Launch the Gradio app
	demo.launch(debug=True)