import gradio as gr import torch import os from compel import Compel, ReturnedEmbeddingsType from diffusers import DiffusionPipeline # Debugging: Check if GPU is available device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") if device == "cuda": print(f"GPU: {torch.cuda.get_device_name(0)}") # Load the model model_name = os.environ.get('MODEL_NAME', 'UnfilteredAI/NSFW-gen-v2') try: print("Loading the model...") pipe = DiffusionPipeline.from_pretrained( model_name, torch_dtype=torch.float16 if device == "cuda" else torch.float32, revision="fp16" if device == "cuda" else None ) pipe.to(device) # Use GPU or CPU based on availability print("Model loaded successfully!") except Exception as e: print(f"Error loading the model: {e}") exit(1) # Initialize Compel for textual embeddings try: compel = Compel( tokenizer=[pipe.tokenizer, pipe.tokenizer_2], text_encoder=[pipe.text_encoder, pipe.text_encoder_2], returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, requires_pooled=[False, True] ) print("Compel initialized successfully!") except Exception as e: print(f"Error initializing Compel: {e}") exit(1) # Function to generate a single scene def generate_scene(prompt, num_inference_steps, guidance_scale, width, height): try: embeds, pooled = compel(prompt) # Generate embeddings for the prompt result = pipe( prompt_embeds=embeds, pooled_prompt_embeds=pooled, num_inference_steps=int(num_inference_steps), guidance_scale=float(guidance_scale), width=int(width), height=int(height), ) return result.images[0] # Return the generated image except Exception as e: print(f"Error generating image for prompt '{prompt}': {e}") return None # Function to generate a storyboard def generate_storyboard(scene_prompts, num_inference_steps, guidance_scale, width, height, character_token): storyboard_images = [] for scene_index, prompt in enumerate(scene_prompts): # Add character token to the prompt for consistency full_prompt = f"{character_token}, {prompt}" print(f"Generating scene {scene_index + 1}: {full_prompt}") image = generate_scene(full_prompt, num_inference_steps, guidance_scale, width, height) if image: storyboard_images.append(image) # Add the generated image to the storyboard return storyboard_images # Gradio Interface def gradio_app(): with gr.Blocks() as storyboard_interface: gr.Markdown("### Storyboard Generator with Consistent Characters") with gr.Row(): character_token_input = gr.Textbox( label="Character Token (e.g., '@brave_warrior')", value="@brave_warrior" ) num_inference_steps = gr.Number(value=25, label="Number of Inference Steps") guidance_scale = gr.Number(value=7.5, label="Guidance Scale") width = gr.Number(value=512, label="Image Width (default: 512)") height = gr.Number(value=512, label="Image Height (default: 512)") scene_prompts = gr.Textbox( label="Enter Scene Prompts (one per line)", placeholder=( "Scene 1: A brave warrior in shining armor, standing in an open field at sunrise.\n" "Scene 2: The warrior walks through a dense forest with sunlight streaming through the trees." ), lines=5, ) storyboard_output = gr.Gallery(label="Generated Storyboard") generate_button = gr.Button("Generate Storyboard") # Connect the button to the storyboard generation function generate_button.click( fn=lambda prompts, steps, scale, w, h, token: generate_storyboard( prompts.strip().split("\n"), steps, scale, w, h, token ), inputs=[ scene_prompts, num_inference_steps, guidance_scale, width, height, character_token_input, ], outputs=storyboard_output, ) return storyboard_interface # Launch Gradio app if __name__ == "__main__": gradio_app().launch(server_port=7860, share=True)