File size: 2,833 Bytes
4e3e212
2cb939c
 
b32ce4a
2cb939c
97f8843
b32ce4a
818d306
3f049b6
 
 
 
 
 
 
 
 
 
 
97f8843
 
 
3f049b6
 
 
 
 
 
 
818d306
97f8843
 
 
3f049b6
2cb939c
97f8843
a598ef7
3f049b6
b32ce4a
818d306
 
a598ef7
3f049b6
a598ef7
 
b32ce4a
818d306
426f612
 
 
 
 
 
 
818d306
 
 
b32ce4a
4e3e212
50437ea
 
b32ce4a
64f1d49
 
 
 
 
 
818d306
 
3f049b6
426f612
 
 
818d306
97f8843
 
426f612
3f049b6
 
97f8843
3f049b6
426f612
3f049b6
b32ce4a
4e3e212
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import gradio as gr
from ultralytics import YOLO
from PIL import Image, ImageDraw

YOLO_MODEL_PATH = "best.pt"
model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")

def merge_boxes_into_lines(boxes, y_threshold=10):
    if len(boxes) == 0:
        return []

    boxes = sorted(boxes, key=lambda b: b[1])
    merged_lines = []
    current_line = list(boxes[0])

    for i in range(1, len(boxes)):
        x1, y1, x2, y2 = boxes[i]

        if abs(y1 - current_line[1]) < y_threshold:
            current_line[0] = min(current_line[0], x1)
            current_line[2] = max(current_line[2], x2)
            current_line[3] = max(current_line[3], y2)
        else:
            merged_lines.append(current_line)
            current_line = list(boxes[i])

    merged_lines.append(current_line)
    return merged_lines

def detect_and_crop_lines(image):
    image = Image.fromarray(image)
    original_image = image.copy()

    results = model.predict(image, conf=0.3, iou=0.5, device="cpu")
    detected_boxes = results[0].boxes.xyxy.tolist()
    detected_boxes = [list(map(int, box)) for box in detected_boxes]

    merged_boxes = merge_boxes_into_lines(detected_boxes)

    draw = ImageDraw.Draw(original_image)
    cropped_lines = []

    for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
        draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
        draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")

        cropped_line = image.crop((x1, y1, x2, y2))

        # Resize the cropped line to a smaller fixed width while maintaining aspect ratio
        fixed_width = 200
        aspect_ratio = cropped_line.height / cropped_line.width
        new_height = int(fixed_width * aspect_ratio)
        cropped_line = cropped_line.resize((fixed_width, new_height), Image.LANCZOS)

        cropped_lines.append(cropped_line)

    return original_image, cropped_lines

with gr.Blocks() as iface:
    gr.Markdown("# Text Line Detection")
    gr.Markdown("## Input your custom image for text line detection")

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Upload Image")
            image_input = gr.Image(type="numpy", label="Upload an image")

        with gr.Column(scale=1):
            gr.Markdown("### Annotated Image with Detected Lines")
            output_annotated = gr.Image(type="pil", label="Detected Text Lines")

    gr.Markdown("### Cropped Text Lines (Small Fixed-Size Previews)")

    cropped_output = gr.Gallery(label="Detected Text Lines", columns=1, preview=True)

    def process_and_display(image):
        annotated_img, cropped_imgs = detect_and_crop_lines(image)
        return annotated_img, cropped_imgs  

    image_input.upload(
        process_and_display,
        inputs=image_input,
        outputs=[output_annotated, cropped_output]
    )

iface.launch()