File size: 3,871 Bytes
6d3c4df
 
 
 
fe50f9f
6d3c4df
0827df2
 
 
6d3c4df
fe50f9f
 
 
0827df2
fe50f9f
 
 
 
 
 
0827df2
 
 
 
6d3c4df
fe50f9f
6d3c4df
 
 
0827df2
6d3c4df
 
 
 
 
 
0827df2
 
 
 
6d3c4df
 
 
 
 
 
 
 
fe50f9f
6d3c4df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe50f9f
 
b28282d
fe50f9f
 
 
 
 
 
6d3c4df
 
 
0827df2
6d3c4df
0827df2
 
6d3c4df
 
 
0827df2
6d3c4df
 
 
0827df2
6d3c4df
 
0827df2
6d3c4df
 
 
 
 
 
 
 
0827df2
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import gradio as gr
from ultralytics import YOLO
from PIL import Image, ImageDraw
import pytesseract
import subprocess

# Ensure Tesseract OCR is installed and detected
TESSERACT_PATH = "/usr/bin/tesseract"
pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH

def check_tesseract():
    """Check if Tesseract is installed and print its version."""
    try:
        tesseract_version = subprocess.check_output([TESSERACT_PATH, "--version"]).decode("utf-8").split("\n")[0]
        print(f"Tesseract Version: {tesseract_version}")
        return True
    except Exception as e:
        print(f"Tesseract not found: {e}")
        return False

# Load YOLO model (ensure best.pt exists in the working directory)
YOLO_MODEL_PATH = "best.pt"
model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")

def merge_boxes_into_lines(boxes, y_threshold=10):
    """Merge bounding boxes if they belong to the same text row."""
    if len(boxes) == 0:
        return []

    boxes = sorted(boxes, key=lambda b: b[1])  # Sort by y-axis (top position)
    merged_lines = []
    current_line = list(boxes[0])

    for i in range(1, len(boxes)):
        x1, y1, x2, y2 = boxes[i]

        if abs(y1 - current_line[1]) < y_threshold:  # Close enough to the previous line
            current_line[0] = min(current_line[0], x1)  # Extend left boundary
            current_line[2] = max(current_line[2], x2)  # Extend right boundary
            current_line[3] = max(current_line[3], y2)  # Extend bottom boundary
        else:
            merged_lines.append(current_line)
            current_line = list(boxes[i])

    merged_lines.append(current_line)
    return merged_lines

def detect_and_ocr(image):
    """Detects text lines, draws bounding boxes, and runs OCR if available."""
    image = Image.fromarray(image)
    original_image = image.copy()

    results = model.predict(image, conf=0.3, iou=0.5, device="cpu")
    detected_boxes = results[0].boxes.xyxy.tolist()
    detected_boxes = [list(map(int, box)) for box in detected_boxes]

    merged_boxes = merge_boxes_into_lines(detected_boxes)

    draw = ImageDraw.Draw(original_image)
    extracted_text_lines = []

    for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
        draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
        draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")

        cropped_line = image.crop((x1, y1, x2, y2))

        if check_tesseract():  # If Tesseract is installed, run OCR
            try:
                ocr_text = pytesseract.image_to_string(cropped_line, lang="khm+eng").strip()
                if ocr_text:
                    extracted_text_lines.append(ocr_text)
            except Exception as e:
                print(f"OCR failed for line {idx}: {e}")

    full_text = "\n".join(extracted_text_lines) if extracted_text_lines else "⚠️ OCR not available. Showing detected lines only."

    return original_image, full_text

# Gradio UI
with gr.Blocks() as iface:
    gr.Markdown("# 📜 Text Line Detection with Khmer OCR")
    gr.Markdown("## 📷 Upload an image to detect text lines and extract Khmer text")

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### 📤 Upload Image")
            image_input = gr.Image(type="numpy", label="Upload an image")

        with gr.Column(scale=1):
            gr.Markdown("### 🖼 Annotated Image with Bounding Boxes")
            output_annotated = gr.Image(type="pil", label="Detected Text Lines")

    gr.Markdown("### 📝 Extracted Text (OCR Result)")
    output_text = gr.Textbox(label="Extracted Text", lines=10)

    image_input.upload(
        detect_and_ocr,
        inputs=image_input,
        outputs=[output_annotated, output_text]
    )

# 🚀 Ensure the app runs properly in Hugging Face Spaces
if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)