Spaces:

Norakneath
/

TestingYolo

Running

App Files Files Community

Norakneath commited on Feb 12

Commit

3f049b6

verified ·

1 Parent(s): 64f1d49

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -44

app.py CHANGED Viewed

@@ -2,64 +2,80 @@ import gradio as gr
 from ultralytics import YOLO
 from PIL import Image, ImageDraw
-# Load YOLO model (trained on 640x640)
 YOLO_MODEL_PATH = "best.pt"
 model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")  # Force CPU usage
-def resize_and_pad(image, target_size=(640, 640)):
-    """Resize image while keeping aspect ratio and padding to fit target size."""
-    original_size = image.size  # (width, height)
-    # Use LANCZOS instead of ANTIALIAS (Fix for Pillow v10+)
-    image.thumbnail(target_size, Image.LANCZOS)
-    # Create a new white background image
-    new_image = Image.new("RGB", target_size, (255, 255, 255))
-    # Paste the resized image in the center
-    paste_x = (target_size[0] - image.size[0]) // 2
-    paste_y = (target_size[1] - image.size[1]) // 2
-    new_image.paste(image, (paste_x, paste_y))
-    return new_image, original_size, paste_x, paste_y
-def detect_lines(image):
-    """Runs YOLOv8 detection on the input image to detect lines."""
     image = Image.fromarray(image)  # Convert NumPy array to PIL Image
-    # Resize & pad A4 images
-    resized_image, original_size, pad_x, pad_y = resize_and_pad(image)
     # Run YOLO detection
-    results = model.predict(resized_image, conf=0.3, iou=0.5, device="cpu")
     detected_boxes = results[0].boxes.xyxy.tolist()
     detected_boxes = [list(map(int, box)) for box in detected_boxes]  # Convert to integer
-    # Scale boxes back to original image size
-    width_ratio = original_size[0] / 640
-    height_ratio = original_size[1] / 640
-    scaled_boxes = [
-        [int((x1 - pad_x) * width_ratio), int((y1 - pad_y) * height_ratio),
-         int((x2 - pad_x) * width_ratio), int((y2 - pad_y) * height_ratio)]
-        for x1, y1, x2, y2 in detected_boxes
-    ]
-    # Draw bounding boxes on the original image
-    image_with_boxes = image.copy()
     draw = ImageDraw.Draw(image_with_boxes)
-    for idx, (x1, y1, x2, y2) in enumerate(scaled_boxes):
         draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
         draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
     return image_with_boxes
-# Define Gradio interface with two-column layout
 with gr.Blocks() as iface:
-    gr.Markdown("# Text Line Detection with YOLOv8")
-    gr.Markdown("Upload an image and detect lines of text.")
     with gr.Row():
         with gr.Column(scale=1):
@@ -67,10 +83,18 @@ with gr.Blocks() as iface:
             image_input = gr.Image(type="numpy", label="Upload an image")
         with gr.Column(scale=1):
-            gr.Markdown("### Detected Text Lines")
-            image_output = gr.Image(type="pil", label="Detected lines")
-    image_input.upload(detect_lines, inputs=image_input, outputs=image_output)
 # Launch Gradio interface
 iface.launch()

 from ultralytics import YOLO
 from PIL import Image, ImageDraw
+# Load YOLO model
 YOLO_MODEL_PATH = "best.pt"
 model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")  # Force CPU usage
+def merge_boxes_into_lines(boxes, y_threshold=20):
+    """
+    Merge bounding boxes that are close together in the y-axis (same line).
+    Args:
+        boxes: List of bounding boxes [x1, y1, x2, y2]
+        y_threshold: Max distance between words to consider as the same line
+    Returns:
+        List of merged line bounding boxes
+    """
+    if len(boxes) == 0:
+        return []
+    # Sort boxes by y1 (top position)
+    boxes = sorted(boxes, key=lambda b: b[1])
+    merged_lines = []
+    current_line = list(boxes[0])
+    for i in range(1, len(boxes)):
+        x1, y1, x2, y2 = boxes[i]
+        # Merge boxes that are close in the y-axis
+        if abs(y1 - current_line[1]) < y_threshold:
+            current_line[0] = min(current_line[0], x1)  # Expand left boundary
+            current_line[2] = max(current_line[2], x2)  # Expand right boundary
+            current_line[3] = max(current_line[3], y2)  # Expand bottom boundary
+        else:
+            merged_lines.append(current_line)
+            current_line = list(boxes[i])
+    merged_lines.append(current_line)
+    return merged_lines
+def detect_lines(image, resize=False, target_size=(640, 640)):
+    """
+    Detects text lines using YOLO and merges bounding boxes.
+    Args:
+        image: Input image (PIL format)
+        resize: Boolean, whether to resize image before detection
+        target_size: Tuple (width, height) for resizing
+    Returns:
+        Image with bounding boxes drawn
+    """
     image = Image.fromarray(image)  # Convert NumPy array to PIL Image
+    original_image = image.copy()  # Keep a copy of the original image
+    if resize:
+        image = image.resize(target_size, Image.LANCZOS)
     # Run YOLO detection
+    results = model.predict(image, conf=0.3, iou=0.5, device="cpu")
     detected_boxes = results[0].boxes.xyxy.tolist()
     detected_boxes = [list(map(int, box)) for box in detected_boxes]  # Convert to integer
+    # Merge bounding boxes into full text lines
+    merged_boxes = merge_boxes_into_lines(detected_boxes)
+    # Draw bounding boxes
+    image_with_boxes = original_image if not resize else image.copy()
     draw = ImageDraw.Draw(image_with_boxes)
+    for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
         draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
         draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
     return image_with_boxes
+# Define Gradio interface with two options: Original & Resized detection
 with gr.Blocks() as iface:
+    gr.Markdown("# Text Line Detection with Merging (YOLOv8)")
     with gr.Row():
         with gr.Column(scale=1):
             image_input = gr.Image(type="numpy", label="Upload an image")
         with gr.Column(scale=1):
+            gr.Markdown("### YOLO on Original Image")
+            output_original = gr.Image(type="pil", label="Detected Lines (Original Size)")
+        with gr.Column(scale=1):
+            gr.Markdown("### YOLO on Resized Image (640x640)")
+            output_resized = gr.Image(type="pil", label="Detected Lines (Resized to 640x640)")
+    image_input.upload(
+        lambda img: (detect_lines(img, resize=False), detect_lines(img, resize=True, target_size=(640, 640))),
+        inputs=image_input,
+        outputs=[output_original, output_resized]
+    )
 # Launch Gradio interface
 iface.launch()