Spaces:

AmitGazal
/

holiday_cards

Sleeping

App Files Files Community

Amit Gazal commited on Dec 11, 2024

Commit

eb5c95c

1 Parent(s): 48da469

add text rectangle

Browse files

Files changed (2) hide show

app.py +121 -17
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-from PIL import Image
 import matplotlib.pyplot as plt
 import torch
 from torchvision import transforms
@@ -11,10 +11,14 @@ import io
 import requests
 import numpy as np
 from scipy import ndimage
 IDEOGRAM_API_KEY = os.getenv('IDEOGRAM_API_KEY')
 IDEOGRAM_URL = "https://api.ideogram.ai/edit"
 client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
 # Constants should be in UPPERCASE
 GPT_MODEL_NAME = "gpt-4o"
@@ -27,20 +31,19 @@ if torch.cuda.is_available():
 model.eval()
 GPT_PROMPT = '''
-I work with a tool that knows how to edit backgrounds.
-I want your help with prompt.
-I want to adjust their background to be in a christmas vibes.
-For example, if you see a tree there, cover it in snow,
-add christmas lights to some of the stuff in the background, maybe add a few elements like christmas tree, but take into considration the perspective and the logic of the image.
 '''
-def image_to_prompt(image: str) -> tuple[str, str]:
     base64_image = encode_image(image)
     messages = [{
         "role": "user",
         "content": [
-            {"type": "text", "text": GPT_PROMPT},
             {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
         ]
     }]
@@ -151,17 +154,115 @@ def dilate_mask(mask: Image.Image) -> Image.Image:
     # Convert back to PIL Image
     return Image.fromarray(dilated_mask.astype(np.uint8))
 def run_flow(input_image, holiday, message):
-    prompt = image_to_prompt(input_image)
     print(prompt)
     result_image, only_background_image, mask = remove_background(input_image)
     dilated_mask = dilate_mask(mask)
-    modified_image = modify_background(input_image, dilated_mask, prompt)
-    first_output_image = mask
-    second_output_image = dilated_mask
-    third_output_image = modified_image
-    return first_output_image, second_output_image, third_output_image
 # Replace the demo interface
 demo = gr.Interface(
@@ -172,9 +273,12 @@ demo = gr.Interface(
         gr.Text(label="Optional Message", placeholder="Enter your holiday message here...")
     ],
     outputs=[
-        gr.Image(type="pil", label="First Output"),
-        gr.Image(type="pil", label="Second Output"),
-        gr.Image(type="pil", label="Third Output")
     ],
     title="Holiday Card Generator",
     description="Upload an image to generate a holiday card"

 import gradio as gr
+from PIL import Image, ImageDraw
 import matplotlib.pyplot as plt
 import torch
 from torchvision import transforms
 import requests
 import numpy as np
 from scipy import ndimage
+from insightface.app import FaceAnalysis
 IDEOGRAM_API_KEY = os.getenv('IDEOGRAM_API_KEY')
 IDEOGRAM_URL = "https://api.ideogram.ai/edit"
+face_detection_app = FaceAnalysis(allowed_modules=['detection']) # enable detection model only
+face_detection_app.prepare(ctx_id=0, det_size=(640, 640))
 client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
 # Constants should be in UPPERCASE
 GPT_MODEL_NAME = "gpt-4o"
 model.eval()
 GPT_PROMPT = '''
+You are a background editor.
+Your job is to adjust the background of the image to be in a {{holiday}} vibes, but take into considration the perspective and the logic of the image.
+Your output should be a prompt that can be used to edit the background of the image.
+The background should be edited in a way that is consistent with the image.
 '''
+def image_to_prompt(image: str, holiday: str) -> tuple[str, str]:
     base64_image = encode_image(image)
     messages = [{
         "role": "user",
         "content": [
+            {"type": "text", "text": GPT_PROMPT.replace("{{holiday}}", holiday)},
             {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
         ]
     }]
     # Convert back to PIL Image
     return Image.fromarray(dilated_mask.astype(np.uint8))
+def detect_faces(image: Image.Image) -> list[dict]:
+    # Convert PIL Image to numpy array
+    image_np = np.array(image)
+    faces = face_detection_app.get(image_np)
+    return faces
+def check_text_position(x, y, text_rect_width, text_rect_height, face_rects, image_width, image_height):
+        # Calculate text rectangle bounds
+        text_x1 = x - text_rect_width//2
+        text_y1 = y - text_rect_height//2
+        text_x2 = x + text_rect_width//2
+        text_y2 = y + text_rect_height//2
+        # Check if text is within image bounds
+        if (text_x1 < 0 or text_x2 > image_width or
+            text_y1 < 0 or text_y2 > image_height):
+            return False
+        # Check for collision with any face
+        for face_rect in face_rects:
+            fx1, fy1, fx2, fy2 = face_rect
+            # Check if rectangles overlap
+            if not (text_x2 < fx1 or text_x1 > fx2 or text_y2 < fy1 or text_y1 > fy2):
+                return False
+        return True
+def find_place_to_add_text(image: Image.Image, faces: list[dict]) -> tuple[int, int]:
+    image_width, image_height = image.size
+    # Convert face coordinates to rectangles for collision detection
+    face_rects = []
+    padding = 20  # Padding around faces
+    for face in faces:
+        bbox = face.bbox  # Get bounding box coordinates
+        x1, y1, x2, y2 = map(int, bbox)
+        face_rects.append((
+            max(0, x1-padding),
+            max(0, y1-padding),
+            min(image_width, x2+padding),
+            min(image_height, y2+padding)
+        ))
+    # Define possible text positions
+    padding_x = int(0.1 * image_width)
+    padding_y = int(0.1 * image_height)
+    positions = [
+        (image_width//2, int(0.85*image_height) - padding_y),  # Bottom center
+        (image_width//2, int(0.15*image_height) + padding_y),  # Top center
+        (int(0.15*image_width) + padding_x, image_height//2),  # Left middle
+        (int(0.85*image_width) - padding_x, image_height//2)   # Right middle
+    ]
+    # Start with largest desired text size and gradually reduce
+    current_text_width = 0.8
+    current_text_height = 0.3
+    min_text_width = 0.1
+    min_text_height = 0.03
+    reduction_factor = 0.9  # Reduce size by 10% each iteration
+    while current_text_width >= min_text_width and current_text_height >= min_text_height:
+        text_rect_width = current_text_width * image_width
+        text_rect_height = current_text_height * image_height
+        # Try each position with current size
+        for x, y in positions:
+            if check_text_position(x, y, text_rect_width, text_rect_height,
+                                 face_rects, image_width, image_height):
+                top_left_x_in_percent = (x - text_rect_width//2) / image_width
+                top_left_y_in_percent = (y - text_rect_height//2) / image_height
+                return top_left_x_in_percent, top_left_y_in_percent, current_text_width, current_text_height
+        # If no position works, reduce text size and try again
+        current_text_width *= reduction_factor
+        current_text_height *= reduction_factor
+    # If we get here, return bottom center with minimum size as fallback
+    print("Failed to find a suitable position")
+    # Return bottom center with minimum size as fallback
+    return (
+        (image_width//2 - (min_text_width * image_width)//2) / image_width,  # x position
+        (int(0.85*image_height) - (min_text_height * image_height)//2) / image_height,  # y position
+        min_text_width,  # width
+        min_text_height  # height
+    )
 def run_flow(input_image, holiday, message):
+    faces = detect_faces(input_image)
+    prompt = image_to_prompt(input_image, holiday)
     print(prompt)
     result_image, only_background_image, mask = remove_background(input_image)
     dilated_mask = dilate_mask(mask)
+    output_image = modify_background(input_image, dilated_mask, prompt)
+    # Create a copy of the modified image before drawing
+    output_image_with_text_rectangle = output_image.copy()
+    text_x_in_percent, text_y_in_percent, text_width_in_percent, text_height_in_percent = find_place_to_add_text(input_image, faces)
+    text_x = text_x_in_percent * output_image.width
+    text_y = text_y_in_percent * output_image.height
+    text_width = text_width_in_percent * output_image.width
+    text_height = text_height_in_percent * output_image.height
+    print(text_x, text_y, text_width, text_height)
+    draw = ImageDraw.Draw(output_image_with_text_rectangle)
+    draw.rectangle((text_x, text_y, text_x + text_width, text_y + text_height), outline="red")
+    # Return the actual images, not the ImageDraw object
+    return output_image, output_image_with_text_rectangle, text_x_in_percent, text_y_in_percent, text_width_in_percent, text_height_in_percent
 # Replace the demo interface
 demo = gr.Interface(
         gr.Text(label="Optional Message", placeholder="Enter your holiday message here...")
     ],
     outputs=[
+        gr.Image(type="pil", label="Output Image"),
+        gr.Image(type="pil", label="Output Image With Text Rectangle"),
+        gr.Number(label="Text Top Left X"),
+        gr.Number(label="Text Top Left Y"),
+        gr.Number(label="Text Width"),
+        gr.Number(label="Text Height")
     ],
     title="Holiday Card Generator",
     description="Upload an image to generate a holiday card"

requirements.txt CHANGED Viewed

@@ -9,4 +9,6 @@ matplotlib
 openai
 requests
 scipy
-numpy

 openai
 requests
 scipy
+numpy
+insightface
+onnxruntime