Norakneath commited on
Commit
97f8843
·
verified ·
1 Parent(s): 818d306

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -38
app.py CHANGED
@@ -2,38 +2,25 @@ import gradio as gr
2
  from ultralytics import YOLO
3
  from PIL import Image, ImageDraw
4
 
5
- # Load YOLO model
6
  YOLO_MODEL_PATH = "best.pt"
7
- model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu") # Force CPU usage
8
 
9
  def merge_boxes_into_lines(boxes, y_threshold=10):
10
- """
11
- Merge bounding boxes that are on the same row but not merge different row lines.
12
- Args:
13
- boxes: List of bounding boxes [x1, y1, x2, y2]
14
- y_threshold: Max difference in y1 position to be considered the same row
15
- Returns:
16
- List of merged line bounding boxes
17
- """
18
  if len(boxes) == 0:
19
  return []
20
 
21
- # Sort boxes by y1 (top position)
22
  boxes = sorted(boxes, key=lambda b: b[1])
23
-
24
  merged_lines = []
25
  current_line = list(boxes[0])
26
 
27
  for i in range(1, len(boxes)):
28
  x1, y1, x2, y2 = boxes[i]
29
 
30
- # Merge only if y position is very close (same row)
31
  if abs(y1 - current_line[1]) < y_threshold:
32
- current_line[0] = min(current_line[0], x1) # Expand left boundary
33
- current_line[2] = max(current_line[2], x2) # Expand right boundary
34
- current_line[3] = max(current_line[3], y2) # Expand bottom boundary
35
  else:
36
- # Store previous line and start a new one
37
  merged_lines.append(current_line)
38
  current_line = list(boxes[i])
39
 
@@ -41,25 +28,15 @@ def merge_boxes_into_lines(boxes, y_threshold=10):
41
  return merged_lines
42
 
43
  def detect_and_crop_lines(image):
44
- """
45
- Detects text lines using YOLO, merges them, and crops each line.
46
- Args:
47
- image: Input image (PIL format)
48
- Returns:
49
- Annotated image with bounding boxes, List of cropped images
50
- """
51
- image = Image.fromarray(image) # Convert NumPy array to PIL Image
52
- original_image = image.copy() # Keep a copy of the original image
53
-
54
- # Run YOLO detection on the original image
55
  results = model.predict(image, conf=0.3, iou=0.5, device="cpu")
56
  detected_boxes = results[0].boxes.xyxy.tolist()
57
- detected_boxes = [list(map(int, box)) for box in detected_boxes] # Convert to integer
58
 
59
- # Merge bounding boxes based on row position
60
  merged_boxes = merge_boxes_into_lines(detected_boxes)
61
 
62
- # Draw bounding boxes
63
  draw = ImageDraw.Draw(original_image)
64
  cropped_lines = []
65
 
@@ -67,13 +44,11 @@ def detect_and_crop_lines(image):
67
  draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
68
  draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
69
 
70
- # Crop the detected text line
71
  cropped_line = image.crop((x1, y1, x2, y2))
72
  cropped_lines.append(cropped_line)
73
 
74
  return original_image, cropped_lines
75
 
76
- # Define Gradio interface
77
  with gr.Blocks() as iface:
78
  gr.Markdown("# Text Line Detection")
79
  gr.Markdown("## Input your custom image for text line detection")
@@ -87,15 +62,22 @@ with gr.Blocks() as iface:
87
  gr.Markdown("### Annotated Image with Detected Lines")
88
  output_annotated = gr.Image(type="pil", label="Detected Text Lines")
89
 
90
- gr.Markdown("### Cropped Text Lines (Each Line Detected Separately)")
 
 
 
 
 
91
 
92
- cropped_gallery = gr.Gallery(label="Cropped Lines Gallery", columns=3, preview=True)
 
 
 
93
 
94
  image_input.upload(
95
- lambda img: detect_and_crop_lines(img),
96
  inputs=image_input,
97
- outputs=[output_annotated, cropped_gallery]
98
  )
99
 
100
- # Launch Gradio interface
101
  iface.launch()
 
2
  from ultralytics import YOLO
3
  from PIL import Image, ImageDraw
4
 
 
5
  YOLO_MODEL_PATH = "best.pt"
6
+ model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
7
 
8
  def merge_boxes_into_lines(boxes, y_threshold=10):
 
 
 
 
 
 
 
 
9
  if len(boxes) == 0:
10
  return []
11
 
 
12
  boxes = sorted(boxes, key=lambda b: b[1])
 
13
  merged_lines = []
14
  current_line = list(boxes[0])
15
 
16
  for i in range(1, len(boxes)):
17
  x1, y1, x2, y2 = boxes[i]
18
 
 
19
  if abs(y1 - current_line[1]) < y_threshold:
20
+ current_line[0] = min(current_line[0], x1)
21
+ current_line[2] = max(current_line[2], x2)
22
+ current_line[3] = max(current_line[3], y2)
23
  else:
 
24
  merged_lines.append(current_line)
25
  current_line = list(boxes[i])
26
 
 
28
  return merged_lines
29
 
30
  def detect_and_crop_lines(image):
31
+ image = Image.fromarray(image)
32
+ original_image = image.copy()
33
+
 
 
 
 
 
 
 
 
34
  results = model.predict(image, conf=0.3, iou=0.5, device="cpu")
35
  detected_boxes = results[0].boxes.xyxy.tolist()
36
+ detected_boxes = [list(map(int, box)) for box in detected_boxes]
37
 
 
38
  merged_boxes = merge_boxes_into_lines(detected_boxes)
39
 
 
40
  draw = ImageDraw.Draw(original_image)
41
  cropped_lines = []
42
 
 
44
  draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
45
  draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
46
 
 
47
  cropped_line = image.crop((x1, y1, x2, y2))
48
  cropped_lines.append(cropped_line)
49
 
50
  return original_image, cropped_lines
51
 
 
52
  with gr.Blocks() as iface:
53
  gr.Markdown("# Text Line Detection")
54
  gr.Markdown("## Input your custom image for text line detection")
 
62
  gr.Markdown("### Annotated Image with Detected Lines")
63
  output_annotated = gr.Image(type="pil", label="Detected Text Lines")
64
 
65
+ gr.Markdown("### Cropped Text Lines (Displayed Row by Row)")
66
+
67
+ cropped_output_rows = []
68
+ for i in range(20):
69
+ with gr.Row():
70
+ cropped_output_rows.append(gr.Image(type="pil", label=f"Line {i+1}"))
71
 
72
+ def process_and_display(image):
73
+ annotated_img, cropped_imgs = detect_and_crop_lines(image)
74
+ cropped_imgs += [None] * (20 - len(cropped_imgs))
75
+ return [annotated_img] + cropped_imgs[:20]
76
 
77
  image_input.upload(
78
+ process_and_display,
79
  inputs=image_input,
80
+ outputs=[output_annotated] + cropped_output_rows
81
  )
82
 
 
83
  iface.launch()