Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -4,41 +4,40 @@ from PIL import Image, ImageDraw
|
|
4 |
import pytesseract
|
5 |
import subprocess
|
6 |
|
7 |
-
#
|
8 |
-
|
9 |
-
|
10 |
-
# Load YOLO model
|
11 |
-
YOLO_MODEL_PATH = "best.pt"
|
12 |
-
model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
|
13 |
|
14 |
def check_tesseract():
|
15 |
"""Check if Tesseract is installed and print its version."""
|
16 |
try:
|
17 |
-
|
18 |
-
tesseract_version = subprocess.check_output(["tesseract", "--version"]).decode("utf-8").split("\n")[0]
|
19 |
-
print(f"Tesseract Path: {tesseract_path}")
|
20 |
print(f"Tesseract Version: {tesseract_version}")
|
21 |
return True
|
22 |
except Exception as e:
|
23 |
print(f"Tesseract not found: {e}")
|
24 |
return False
|
25 |
|
|
|
|
|
|
|
|
|
26 |
def merge_boxes_into_lines(boxes, y_threshold=10):
|
27 |
"""Merge bounding boxes if they belong to the same text row."""
|
28 |
if len(boxes) == 0:
|
29 |
return []
|
30 |
|
31 |
-
boxes = sorted(boxes, key=lambda b: b[1])
|
32 |
merged_lines = []
|
33 |
current_line = list(boxes[0])
|
34 |
|
35 |
for i in range(1, len(boxes)):
|
36 |
x1, y1, x2, y2 = boxes[i]
|
37 |
|
38 |
-
if abs(y1 - current_line[1]) < y_threshold:
|
39 |
-
current_line[0] = min(current_line[0], x1)
|
40 |
-
current_line[2] = max(current_line[2], x2)
|
41 |
-
current_line[3] = max(current_line[3], y2)
|
42 |
else:
|
43 |
merged_lines.append(current_line)
|
44 |
current_line = list(boxes[i])
|
@@ -78,20 +77,21 @@ def detect_and_ocr(image):
|
|
78 |
|
79 |
return original_image, full_text
|
80 |
|
|
|
81 |
with gr.Blocks() as iface:
|
82 |
-
gr.Markdown("# Text Line Detection with Khmer OCR")
|
83 |
-
gr.Markdown("## Upload an image to detect text lines and extract Khmer text")
|
84 |
|
85 |
with gr.Row():
|
86 |
with gr.Column(scale=1):
|
87 |
-
gr.Markdown("### Upload Image")
|
88 |
image_input = gr.Image(type="numpy", label="Upload an image")
|
89 |
|
90 |
with gr.Column(scale=1):
|
91 |
-
gr.Markdown("### Annotated Image with Bounding Boxes")
|
92 |
output_annotated = gr.Image(type="pil", label="Detected Text Lines")
|
93 |
|
94 |
-
gr.Markdown("### Extracted Text (OCR Result)")
|
95 |
output_text = gr.Textbox(label="Extracted Text", lines=10)
|
96 |
|
97 |
image_input.upload(
|
@@ -100,4 +100,6 @@ with gr.Blocks() as iface:
|
|
100 |
outputs=[output_annotated, output_text]
|
101 |
)
|
102 |
|
103 |
-
|
|
|
|
|
|
4 |
import pytesseract
|
5 |
import subprocess
|
6 |
|
7 |
+
# Ensure Tesseract OCR is installed and detected
|
8 |
+
TESSERACT_PATH = "/usr/bin/tesseract"
|
9 |
+
pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH
|
|
|
|
|
|
|
10 |
|
11 |
def check_tesseract():
|
12 |
"""Check if Tesseract is installed and print its version."""
|
13 |
try:
|
14 |
+
tesseract_version = subprocess.check_output([TESSERACT_PATH, "--version"]).decode("utf-8").split("\n")[0]
|
|
|
|
|
15 |
print(f"Tesseract Version: {tesseract_version}")
|
16 |
return True
|
17 |
except Exception as e:
|
18 |
print(f"Tesseract not found: {e}")
|
19 |
return False
|
20 |
|
21 |
+
# Load YOLO model (ensure best.pt exists in the working directory)
|
22 |
+
YOLO_MODEL_PATH = "best.pt"
|
23 |
+
model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
|
24 |
+
|
25 |
def merge_boxes_into_lines(boxes, y_threshold=10):
|
26 |
"""Merge bounding boxes if they belong to the same text row."""
|
27 |
if len(boxes) == 0:
|
28 |
return []
|
29 |
|
30 |
+
boxes = sorted(boxes, key=lambda b: b[1]) # Sort by y-axis (top position)
|
31 |
merged_lines = []
|
32 |
current_line = list(boxes[0])
|
33 |
|
34 |
for i in range(1, len(boxes)):
|
35 |
x1, y1, x2, y2 = boxes[i]
|
36 |
|
37 |
+
if abs(y1 - current_line[1]) < y_threshold: # Close enough to the previous line
|
38 |
+
current_line[0] = min(current_line[0], x1) # Extend left boundary
|
39 |
+
current_line[2] = max(current_line[2], x2) # Extend right boundary
|
40 |
+
current_line[3] = max(current_line[3], y2) # Extend bottom boundary
|
41 |
else:
|
42 |
merged_lines.append(current_line)
|
43 |
current_line = list(boxes[i])
|
|
|
77 |
|
78 |
return original_image, full_text
|
79 |
|
80 |
+
# Gradio UI
|
81 |
with gr.Blocks() as iface:
|
82 |
+
gr.Markdown("# 📜 Text Line Detection with Khmer OCR")
|
83 |
+
gr.Markdown("## 📷 Upload an image to detect text lines and extract Khmer text")
|
84 |
|
85 |
with gr.Row():
|
86 |
with gr.Column(scale=1):
|
87 |
+
gr.Markdown("### 📤 Upload Image")
|
88 |
image_input = gr.Image(type="numpy", label="Upload an image")
|
89 |
|
90 |
with gr.Column(scale=1):
|
91 |
+
gr.Markdown("### 🖼 Annotated Image with Bounding Boxes")
|
92 |
output_annotated = gr.Image(type="pil", label="Detected Text Lines")
|
93 |
|
94 |
+
gr.Markdown("### 📝 Extracted Text (OCR Result)")
|
95 |
output_text = gr.Textbox(label="Extracted Text", lines=10)
|
96 |
|
97 |
image_input.upload(
|
|
|
100 |
outputs=[output_annotated, output_text]
|
101 |
)
|
102 |
|
103 |
+
# 🚀 Ensure the app runs properly in Hugging Face Spaces
|
104 |
+
if __name__ == "__main__":
|
105 |
+
iface.launch(server_name="0.0.0.0", server_port=7860)
|