Spaces:
Running
Running
File size: 3,871 Bytes
6d3c4df fe50f9f 6d3c4df 0827df2 6d3c4df fe50f9f 0827df2 fe50f9f 0827df2 6d3c4df fe50f9f 6d3c4df 0827df2 6d3c4df 0827df2 6d3c4df fe50f9f 6d3c4df fe50f9f b28282d fe50f9f 6d3c4df 0827df2 6d3c4df 0827df2 6d3c4df 0827df2 6d3c4df 0827df2 6d3c4df 0827df2 6d3c4df 0827df2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import gradio as gr
from ultralytics import YOLO
from PIL import Image, ImageDraw
import pytesseract
import subprocess
# Ensure Tesseract OCR is installed and detected
TESSERACT_PATH = "/usr/bin/tesseract"
pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH
def check_tesseract():
"""Check if Tesseract is installed and print its version."""
try:
tesseract_version = subprocess.check_output([TESSERACT_PATH, "--version"]).decode("utf-8").split("\n")[0]
print(f"Tesseract Version: {tesseract_version}")
return True
except Exception as e:
print(f"Tesseract not found: {e}")
return False
# Load YOLO model (ensure best.pt exists in the working directory)
YOLO_MODEL_PATH = "best.pt"
model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
def merge_boxes_into_lines(boxes, y_threshold=10):
"""Merge bounding boxes if they belong to the same text row."""
if len(boxes) == 0:
return []
boxes = sorted(boxes, key=lambda b: b[1]) # Sort by y-axis (top position)
merged_lines = []
current_line = list(boxes[0])
for i in range(1, len(boxes)):
x1, y1, x2, y2 = boxes[i]
if abs(y1 - current_line[1]) < y_threshold: # Close enough to the previous line
current_line[0] = min(current_line[0], x1) # Extend left boundary
current_line[2] = max(current_line[2], x2) # Extend right boundary
current_line[3] = max(current_line[3], y2) # Extend bottom boundary
else:
merged_lines.append(current_line)
current_line = list(boxes[i])
merged_lines.append(current_line)
return merged_lines
def detect_and_ocr(image):
"""Detects text lines, draws bounding boxes, and runs OCR if available."""
image = Image.fromarray(image)
original_image = image.copy()
results = model.predict(image, conf=0.3, iou=0.5, device="cpu")
detected_boxes = results[0].boxes.xyxy.tolist()
detected_boxes = [list(map(int, box)) for box in detected_boxes]
merged_boxes = merge_boxes_into_lines(detected_boxes)
draw = ImageDraw.Draw(original_image)
extracted_text_lines = []
for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
cropped_line = image.crop((x1, y1, x2, y2))
if check_tesseract(): # If Tesseract is installed, run OCR
try:
ocr_text = pytesseract.image_to_string(cropped_line, lang="khm+eng").strip()
if ocr_text:
extracted_text_lines.append(ocr_text)
except Exception as e:
print(f"OCR failed for line {idx}: {e}")
full_text = "\n".join(extracted_text_lines) if extracted_text_lines else "⚠️ OCR not available. Showing detected lines only."
return original_image, full_text
# Gradio UI
with gr.Blocks() as iface:
gr.Markdown("# 📜 Text Line Detection with Khmer OCR")
gr.Markdown("## 📷 Upload an image to detect text lines and extract Khmer text")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 📤 Upload Image")
image_input = gr.Image(type="numpy", label="Upload an image")
with gr.Column(scale=1):
gr.Markdown("### 🖼 Annotated Image with Bounding Boxes")
output_annotated = gr.Image(type="pil", label="Detected Text Lines")
gr.Markdown("### 📝 Extracted Text (OCR Result)")
output_text = gr.Textbox(label="Extracted Text", lines=10)
image_input.upload(
detect_and_ocr,
inputs=image_input,
outputs=[output_annotated, output_text]
)
# 🚀 Ensure the app runs properly in Hugging Face Spaces
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)
|