Norakneath commited on
Commit
3f049b6
·
verified ·
1 Parent(s): 64f1d49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -44
app.py CHANGED
@@ -2,64 +2,80 @@ import gradio as gr
2
  from ultralytics import YOLO
3
  from PIL import Image, ImageDraw
4
 
5
- # Load YOLO model (trained on 640x640)
6
  YOLO_MODEL_PATH = "best.pt"
7
  model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu") # Force CPU usage
8
 
9
- def resize_and_pad(image, target_size=(640, 640)):
10
- """Resize image while keeping aspect ratio and padding to fit target size."""
11
- original_size = image.size # (width, height)
12
-
13
- # Use LANCZOS instead of ANTIALIAS (Fix for Pillow v10+)
14
- image.thumbnail(target_size, Image.LANCZOS)
15
-
16
- # Create a new white background image
17
- new_image = Image.new("RGB", target_size, (255, 255, 255))
18
-
19
- # Paste the resized image in the center
20
- paste_x = (target_size[0] - image.size[0]) // 2
21
- paste_y = (target_size[1] - image.size[1]) // 2
22
- new_image.paste(image, (paste_x, paste_y))
23
-
24
- return new_image, original_size, paste_x, paste_y
25
-
26
- def detect_lines(image):
27
- """Runs YOLOv8 detection on the input image to detect lines."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  image = Image.fromarray(image) # Convert NumPy array to PIL Image
29
-
30
- # Resize & pad A4 images
31
- resized_image, original_size, pad_x, pad_y = resize_and_pad(image)
 
32
 
33
  # Run YOLO detection
34
- results = model.predict(resized_image, conf=0.3, iou=0.5, device="cpu")
35
-
36
  detected_boxes = results[0].boxes.xyxy.tolist()
37
  detected_boxes = [list(map(int, box)) for box in detected_boxes] # Convert to integer
38
 
39
- # Scale boxes back to original image size
40
- width_ratio = original_size[0] / 640
41
- height_ratio = original_size[1] / 640
42
-
43
- scaled_boxes = [
44
- [int((x1 - pad_x) * width_ratio), int((y1 - pad_y) * height_ratio),
45
- int((x2 - pad_x) * width_ratio), int((y2 - pad_y) * height_ratio)]
46
- for x1, y1, x2, y2 in detected_boxes
47
- ]
48
 
49
- # Draw bounding boxes on the original image
50
- image_with_boxes = image.copy()
51
  draw = ImageDraw.Draw(image_with_boxes)
52
 
53
- for idx, (x1, y1, x2, y2) in enumerate(scaled_boxes):
54
  draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
55
  draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
56
 
57
  return image_with_boxes
58
 
59
- # Define Gradio interface with two-column layout
60
  with gr.Blocks() as iface:
61
- gr.Markdown("# Text Line Detection with YOLOv8")
62
- gr.Markdown("Upload an image and detect lines of text.")
63
 
64
  with gr.Row():
65
  with gr.Column(scale=1):
@@ -67,10 +83,18 @@ with gr.Blocks() as iface:
67
  image_input = gr.Image(type="numpy", label="Upload an image")
68
 
69
  with gr.Column(scale=1):
70
- gr.Markdown("### Detected Text Lines")
71
- image_output = gr.Image(type="pil", label="Detected lines")
72
-
73
- image_input.upload(detect_lines, inputs=image_input, outputs=image_output)
 
 
 
 
 
 
 
 
74
 
75
  # Launch Gradio interface
76
  iface.launch()
 
2
  from ultralytics import YOLO
3
  from PIL import Image, ImageDraw
4
 
5
+ # Load YOLO model
6
  YOLO_MODEL_PATH = "best.pt"
7
  model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu") # Force CPU usage
8
 
9
+ def merge_boxes_into_lines(boxes, y_threshold=20):
10
+ """
11
+ Merge bounding boxes that are close together in the y-axis (same line).
12
+ Args:
13
+ boxes: List of bounding boxes [x1, y1, x2, y2]
14
+ y_threshold: Max distance between words to consider as the same line
15
+ Returns:
16
+ List of merged line bounding boxes
17
+ """
18
+ if len(boxes) == 0:
19
+ return []
20
+
21
+ # Sort boxes by y1 (top position)
22
+ boxes = sorted(boxes, key=lambda b: b[1])
23
+
24
+ merged_lines = []
25
+ current_line = list(boxes[0])
26
+
27
+ for i in range(1, len(boxes)):
28
+ x1, y1, x2, y2 = boxes[i]
29
+
30
+ # Merge boxes that are close in the y-axis
31
+ if abs(y1 - current_line[1]) < y_threshold:
32
+ current_line[0] = min(current_line[0], x1) # Expand left boundary
33
+ current_line[2] = max(current_line[2], x2) # Expand right boundary
34
+ current_line[3] = max(current_line[3], y2) # Expand bottom boundary
35
+ else:
36
+ merged_lines.append(current_line)
37
+ current_line = list(boxes[i])
38
+
39
+ merged_lines.append(current_line)
40
+ return merged_lines
41
+
42
+ def detect_lines(image, resize=False, target_size=(640, 640)):
43
+ """
44
+ Detects text lines using YOLO and merges bounding boxes.
45
+ Args:
46
+ image: Input image (PIL format)
47
+ resize: Boolean, whether to resize image before detection
48
+ target_size: Tuple (width, height) for resizing
49
+ Returns:
50
+ Image with bounding boxes drawn
51
+ """
52
  image = Image.fromarray(image) # Convert NumPy array to PIL Image
53
+ original_image = image.copy() # Keep a copy of the original image
54
+
55
+ if resize:
56
+ image = image.resize(target_size, Image.LANCZOS)
57
 
58
  # Run YOLO detection
59
+ results = model.predict(image, conf=0.3, iou=0.5, device="cpu")
 
60
  detected_boxes = results[0].boxes.xyxy.tolist()
61
  detected_boxes = [list(map(int, box)) for box in detected_boxes] # Convert to integer
62
 
63
+ # Merge bounding boxes into full text lines
64
+ merged_boxes = merge_boxes_into_lines(detected_boxes)
 
 
 
 
 
 
 
65
 
66
+ # Draw bounding boxes
67
+ image_with_boxes = original_image if not resize else image.copy()
68
  draw = ImageDraw.Draw(image_with_boxes)
69
 
70
+ for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
71
  draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
72
  draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
73
 
74
  return image_with_boxes
75
 
76
+ # Define Gradio interface with two options: Original & Resized detection
77
  with gr.Blocks() as iface:
78
+ gr.Markdown("# Text Line Detection with Merging (YOLOv8)")
 
79
 
80
  with gr.Row():
81
  with gr.Column(scale=1):
 
83
  image_input = gr.Image(type="numpy", label="Upload an image")
84
 
85
  with gr.Column(scale=1):
86
+ gr.Markdown("### YOLO on Original Image")
87
+ output_original = gr.Image(type="pil", label="Detected Lines (Original Size)")
88
+
89
+ with gr.Column(scale=1):
90
+ gr.Markdown("### YOLO on Resized Image (640x640)")
91
+ output_resized = gr.Image(type="pil", label="Detected Lines (Resized to 640x640)")
92
+
93
+ image_input.upload(
94
+ lambda img: (detect_lines(img, resize=False), detect_lines(img, resize=True, target_size=(640, 640))),
95
+ inputs=image_input,
96
+ outputs=[output_original, output_resized]
97
+ )
98
 
99
  # Launch Gradio interface
100
  iface.launch()