Spaces:

Norakneath
/

TestingYolo

Running

App Files Files Community

Norakneath commited on Feb 12

Commit

a598ef7

verified ·

1 Parent(s): 8730661

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -16

app.py CHANGED Viewed

@@ -3,39 +3,77 @@ import torch
 from ultralytics import YOLO
 from PIL import Image, ImageDraw
-# Load YOLO model (Ensure best.pt is in the same directory)
 YOLO_MODEL_PATH = "best.pt"
 model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")  # Force CPU usage
-def detect_text(image):
-    """ Runs YOLOv8 detection on the input image and returns bounding box results. """
-    image = Image.fromarray(image)  # Convert NumPy array to PIL Image
-    # Run YOLO detection
-    results = model.predict(image, conf=0.3, iou=0.4, device="cpu")
     detected_boxes = results[0].boxes.xyxy.tolist()
     # Draw bounding boxes
     image_with_boxes = image.copy()
     draw = ImageDraw.Draw(image_with_boxes)
-    for box in detected_boxes:
-        x1, y1, x2, y2 = map(int, box)
-        draw.rectangle([x1, y1, x2, y2], outline="red", width=2)  # Draw bounding box
-        draw.text((x1, y1 - 10), "Text", fill="red")  # Label each box
     return image_with_boxes
 # Define Gradio interface
 with gr.Blocks() as iface:
-    gr.Markdown("# Text Detection with YOLOv8")
-    gr.Markdown("## Upload an image and detect text regions using YOLO")
     with gr.Tab("Upload Image"):
-        gr.Markdown("Upload an image, and the YOLO model will detect text in the image.")
         image_input = gr.Image(type="numpy", label="Upload an image")
-        image_output = gr.Image(type="pil", label="Detected text")
-        image_input.upload(detect_text, inputs=image_input, outputs=image_output)
 # Launch Gradio interface
 iface.launch()

 from ultralytics import YOLO
 from PIL import Image, ImageDraw
+# Load YOLO model for line detection
 YOLO_MODEL_PATH = "best.pt"
 model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")  # Force CPU usage
+def merge_boxes_into_lines(boxes, y_threshold=20):
+    """
+    Merge bounding boxes that are close together in the y-axis (same line).
+    Args:
+        boxes: List of bounding boxes [x1, y1, x2, y2]
+        y_threshold: Max distance between words to consider as the same line
+    Returns:
+        List of merged line bounding boxes without overlap
+    """
+    if len(boxes) == 0:
+        return []
+    # Sort boxes by y1 (top position)
+    boxes = sorted(boxes, key=lambda b: b[1])
+    merged_lines = []
+    current_line = list(boxes[0])
+    for i in range(1, len(boxes)):
+        x1, y1, x2, y2 = boxes[i]
+        # Merge boxes that are close in the y-axis
+        if abs(y1 - current_line[1]) < y_threshold:
+            current_line[0] = min(current_line[0], x1)  # Expand left boundary
+            current_line[2] = max(current_line[2], x2)  # Expand right boundary
+            current_line[3] = max(current_line[3], y2)  # Expand bottom boundary
+        else:
+            merged_lines.append(current_line)
+            current_line = list(boxes[i])
+    merged_lines.append(current_line)
+    return merged_lines
+def detect_lines(image):
+    """Runs YOLOv8 detection on the input image to detect lines."""
+    image = Image.fromarray(image)  # Convert NumPy array to PIL Image
+    # Run YOLO detection with confidence threshold 0.2
+    results = model.predict(image, conf=0.2, iou=0.4, device="cpu")
     detected_boxes = results[0].boxes.xyxy.tolist()
+    detected_boxes = [list(map(int, box)) for box in detected_boxes]  # Convert to integer
+    # Merge detected bounding boxes into line-level boxes
+    merged_boxes = merge_boxes_into_lines(detected_boxes)
     # Draw bounding boxes
     image_with_boxes = image.copy()
     draw = ImageDraw.Draw(image_with_boxes)
+    for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
+        draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
+        draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
     return image_with_boxes
 # Define Gradio interface
 with gr.Blocks() as iface:
+    gr.Markdown("# Line Detection with YOLOv8")
+    gr.Markdown("## Upload an image to detect text lines using YOLOv8")
     with gr.Tab("Upload Image"):
+        gr.Markdown("Upload an image, and the YOLO model will detect lines of text.")
         image_input = gr.Image(type="numpy", label="Upload an image")
+        image_output = gr.Image(type="pil", label="Detected lines")
+        image_input.upload(detect_lines, inputs=image_input, outputs=image_output)
 # Launch Gradio interface
 iface.launch()