ighoshsubho's picture
Florence sam flux first commit
9aecc37
raw
history blame
4.4 kB
import torch
import numpy as np
from PIL import Image
from diffusers import FluxInpaintPipeline
from utils.florence import load_florence_model, run_florence_inference, FLORENCE_OPEN_VOCABULARY_DETECTION_TASK
from utils.sam import load_sam_image_model, run_sam_inference
import gradio as gr
import supervision as sv
# Load models
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
FLUX_PIPE = FluxInpaintPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16).to(
DEVICE)
FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=DEVICE)
SAM_MODEL = load_sam_image_model(device=DEVICE)
COLORS = ['#FF1493', '#00BFFF', '#FF6347', '#FFD700', '#32CD32', '#8A2BE2']
COLOR_PALETTE = sv.ColorPalette.from_hex(COLORS)
BOX_ANNOTATOR = sv.BoxAnnotator(color=COLOR_PALETTE, color_lookup=sv.ColorLookup.INDEX)
LABEL_ANNOTATOR = sv.LabelAnnotator(
color=COLOR_PALETTE,
color_lookup=sv.ColorLookup.INDEX,
text_position=sv.Position.CENTER_OF_MASS,
text_color=sv.Color.from_hex("#000000"),
border_radius=5
)
MASK_ANNOTATOR = sv.MaskAnnotator(
color=COLOR_PALETTE,
color_lookup=sv.ColorLookup.INDEX
)
def visualize_detections(image, detections):
output_image = image.copy()
output_image = MASK_ANNOTATOR.annotate(output_image, detections)
output_image = BOX_ANNOTATOR.annotate(output_image, detections)
output_image = LABEL_ANNOTATOR.annotate(output_image, detections)
return output_image
def detect_objects(image, text_prompt):
# Use Florence for object detection
_, result = run_florence_inference(
model=FLORENCE_MODEL,
processor=FLORENCE_PROCESSOR,
device=DEVICE,
image=image,
task=FLORENCE_OPEN_VOCABULARY_DETECTION_TASK,
text=text_prompt
)
detections = sv.Detections.from_lmm(
lmm=sv.LMM.FLORENCE_2,
result=result,
resolution_wh=image.size
)
# Use SAM to refine masks
detections = run_sam_inference(SAM_MODEL, image, detections)
return detections
def inpaint_selected_objects(image, detections, selected_indices, inpaint_prompt):
mask = np.zeros(image.size[::-1], dtype=np.uint8)
for idx in selected_indices:
mask |= detections.mask[idx]
mask_image = Image.fromarray(mask * 255)
result = FLUX_PIPE(
prompt=inpaint_prompt,
image=image,
mask_image=mask_image,
num_inference_steps=30,
strength=0.85,
).images[0]
return result
def process_image(input_image, detection_prompt, inpaint_prompt, selected_objects):
detections = detect_objects(input_image, detection_prompt)
# Visualize detected objects
detected_image = visualize_detections(input_image, detections)
if selected_objects:
selected_indices = [int(idx) for idx in selected_objects.split(',')]
inpainted_image = inpaint_selected_objects(input_image, detections, selected_indices, inpaint_prompt)
return detected_image, inpainted_image
else:
return detected_image, None
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Object Detection and Inpainting with FLUX, Florence, and SAM")
with gr.Row():
with gr.Column():
input_image = gr.Image(type="pil", label="Input Image")
detection_prompt = gr.Textbox(label="Detection Prompt", placeholder="Enter objects to detect")
detect_button = gr.Button("Detect Objects")
with gr.Column():
detected_image = gr.Image(type="pil", label="Detected Objects")
selected_objects = gr.Textbox(label="Selected Objects",
placeholder="Enter indices of objects to inpaint (comma-separated)")
inpaint_prompt = gr.Textbox(label="Inpainting Prompt", placeholder="Describe what to inpaint")
inpaint_button = gr.Button("Inpaint Selected Objects")
output_image = gr.Image(type="pil", label="Inpainted Result")
detect_button.click(
fn=lambda img, prompt: process_image(img, prompt, "", "")[0],
inputs=[input_image, detection_prompt],
outputs=detected_image
)
inpaint_button.click(
fn=process_image,
inputs=[input_image, detection_prompt, inpaint_prompt, selected_objects],
outputs=[detected_image, output_image]
)
demo.launch(debug=False, show_error=True)