from typing import Tuple import uuid import random import numpy as np import gradio as gr import spaces import torch from PIL import Image from diffusers import FluxInpaintPipeline from gradio_client import Client, handle_file from PIL import Image # Set an environment variable HF_TOKEN = os.environ.get("HF_TOKEN", None) MARKDOWN = """ # FLUX.1 Inpainting with Text guided Mask🔥 Shoutout to [Black Forest Labs](https://huggingface.co/black-forest-labs) team for creating this amazing model, and a big thanks to [Gothos](https://github.com/Gothos) for taking it to the next level by enabling inpainting with the FLUX. """ MAX_SEED = np.iinfo(np.int32).max MAX_IMAGE_SIZE = 2048 DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # Using Gradio Python Client to query EVF-SAM demo, hosted on SPaces, as an endpoint client = Client("ysharma/evf-sam", hf_token=HF_TOKEN) pipe = FluxInpaintPipeline.from_pretrained( "black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16).to(DEVICE) def resize_image_dimensions( original_resolution_wh: Tuple[int, int], maximum_dimension: int = 2048 ) -> Tuple[int, int]: width, height = original_resolution_wh if width <= maximum_dimension and height <= maximum_dimension: width = width - (width % 32) height = height - (height % 32) return width, height if width > height: scaling_factor = maximum_dimension / width else: scaling_factor = maximum_dimension / height new_width = int(width * scaling_factor) new_height = int(height * scaling_factor) new_width = new_width - (new_width % 32) new_height = new_height - (new_height % 32) return new_width, new_height def evf_sam_mask(image, prompt): print(type(image)) filename=str(uuid.uuid4()) + ".jpg" image.save(filename) images = client.predict( image_np=handle_file(filename), prompt=prompt, api_name="/predict") print(images) # Open the image webp_image = Image.open(images[1]) # Convert to RGB mode if it's not already if webp_image.mode != 'RGB': webp_image = webp_image.convert('RGB') # Create a new PIL Image object pil_image = Image.new('RGB', webp_image.size) pil_image.paste(webp_image) print(pil_image) print(type(pil_image)) return pil_image @spaces.GPU(duration=150) def process( input_image_editor: dict, input_text: str, inpaint_text: str, seed_slicer: int, randomize_seed_checkbox: bool, strength_slider: float, num_inference_steps_slider: int, progress=gr.Progress(track_tqdm=True) ): if not input_text: gr.Info("Please enter a text prompt.") return None image = input_image_editor['background'] #mask = input_image_editor['layers'][0] print(f"type of image: {type(image)}") mask = evf_sam_mask(image, input_text) print(f"type of mask: {type(mask)}") print(f"inpaint_text: {inpaint_text}") print(f"input_text: {input_text}") if not image: gr.Info("Please upload an image.") return None if not mask: gr.Info("Please draw a mask on the image.") return None width, height = resize_image_dimensions(original_resolution_wh=image.size) resized_image = image.resize((width, height), Image.LANCZOS) resized_mask = mask.resize((width, height), Image.NEAREST) if randomize_seed_checkbox: seed_slicer = random.randint(0, MAX_SEED) generator = torch.Generator().manual_seed(seed_slicer) result = pipe( prompt=inpaint_text, image=resized_image, mask_image=resized_mask, width=width, height=height, strength=strength_slider, generator=generator, num_inference_steps=num_inference_steps_slider ).images[0] print('INFERENCE DONE') return result, resized_mask with gr.Blocks() as demo: gr.Markdown(MARKDOWN) with gr.Row(): with gr.Column(): input_image_editor_component = gr.ImageEditor( label='Image', type='pil', sources=["upload", "webcam"], image_mode='RGB', layers=False, brush=gr.Brush(colors=["#FFFFFF"], color_mode="fixed")) with gr.Row(): with gr.Column(): input_text_component = gr.Text( label="Segment", show_label=False, max_lines=1, placeholder="segmentation text", container=False, ) inpaint_text_component = gr.Text( label="Inpaint", show_label=False, max_lines=1, placeholder="Inpaint text", container=False, ) submit_button_component = gr.Button(value='Submit', variant='primary', scale=0) with gr.Accordion("Advanced Settings", open=False): seed_slicer_component = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, ) randomize_seed_checkbox_component = gr.Checkbox( label="Randomize seed", value=False) with gr.Row(): strength_slider_component = gr.Slider( label="Strength", minimum=0, maximum=1, step=0.01, value=0.75, ) num_inference_steps_slider_component = gr.Slider( label="Number of inference steps", minimum=1, maximum=50, step=1, value=20, ) with gr.Column(): output_image_component = gr.Image( type='pil', image_mode='RGB', label='Generated image') with gr.Accordion("Generated Mask", open=False): output_mask_component = gr.Image( type='pil', image_mode='RGB', label='Input mask') submit_button_component.click( fn=process, inputs=[ input_image_editor_component, input_text_component, inpaint_text_component, seed_slicer_component, randomize_seed_checkbox_component, strength_slider_component, num_inference_steps_slider_component ], outputs=[ output_image_component, output_mask_component, ] ) demo.launch(debug=True)