Spaces:

Norakneath
/

TestingYolo

Running

App Files Files Community

Norakneath commited on Feb 12

Commit

0827df2

verified ·

1 Parent(s): 7bb23ba

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -20

app.py CHANGED Viewed

@@ -4,41 +4,40 @@ from PIL import Image, ImageDraw
 import pytesseract
 import subprocess
-# Set Tesseract path (Ensure it works on Hugging Face Spaces)
-pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
-# Load YOLO model
-YOLO_MODEL_PATH = "best.pt"
-model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
 def check_tesseract():
     """Check if Tesseract is installed and print its version."""
     try:
-        tesseract_path = subprocess.check_output(["which", "tesseract"]).decode("utf-8").strip()
-        tesseract_version = subprocess.check_output(["tesseract", "--version"]).decode("utf-8").split("\n")[0]
-        print(f"Tesseract Path: {tesseract_path}")
         print(f"Tesseract Version: {tesseract_version}")
         return True
     except Exception as e:
         print(f"Tesseract not found: {e}")
         return False
 def merge_boxes_into_lines(boxes, y_threshold=10):
     """Merge bounding boxes if they belong to the same text row."""
     if len(boxes) == 0:
         return []
-    boxes = sorted(boxes, key=lambda b: b[1])
     merged_lines = []
     current_line = list(boxes[0])
     for i in range(1, len(boxes)):
         x1, y1, x2, y2 = boxes[i]
-        if abs(y1 - current_line[1]) < y_threshold:
-            current_line[0] = min(current_line[0], x1)
-            current_line[2] = max(current_line[2], x2)
-            current_line[3] = max(current_line[3], y2)
         else:
             merged_lines.append(current_line)
             current_line = list(boxes[i])
@@ -78,20 +77,21 @@ def detect_and_ocr(image):
     return original_image, full_text
 with gr.Blocks() as iface:
-    gr.Markdown("# Text Line Detection with Khmer OCR")
-    gr.Markdown("## Upload an image to detect text lines and extract Khmer text")
     with gr.Row():
         with gr.Column(scale=1):
-            gr.Markdown("### Upload Image")
             image_input = gr.Image(type="numpy", label="Upload an image")
         with gr.Column(scale=1):
-            gr.Markdown("### Annotated Image with Bounding Boxes")
             output_annotated = gr.Image(type="pil", label="Detected Text Lines")
-    gr.Markdown("### Extracted Text (OCR Result)")
     output_text = gr.Textbox(label="Extracted Text", lines=10)
     image_input.upload(
@@ -100,4 +100,6 @@ with gr.Blocks() as iface:
         outputs=[output_annotated, output_text]
     )
-iface.launch()

 import pytesseract
 import subprocess
+# Ensure Tesseract OCR is installed and detected
+TESSERACT_PATH = "/usr/bin/tesseract"
+pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH
 def check_tesseract():
     """Check if Tesseract is installed and print its version."""
     try:
+        tesseract_version = subprocess.check_output([TESSERACT_PATH, "--version"]).decode("utf-8").split("\n")[0]
         print(f"Tesseract Version: {tesseract_version}")
         return True
     except Exception as e:
         print(f"Tesseract not found: {e}")
         return False
+# Load YOLO model (ensure best.pt exists in the working directory)
+YOLO_MODEL_PATH = "best.pt"
+model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
 def merge_boxes_into_lines(boxes, y_threshold=10):
     """Merge bounding boxes if they belong to the same text row."""
     if len(boxes) == 0:
         return []
+    boxes = sorted(boxes, key=lambda b: b[1])  # Sort by y-axis (top position)
     merged_lines = []
     current_line = list(boxes[0])
     for i in range(1, len(boxes)):
         x1, y1, x2, y2 = boxes[i]
+        if abs(y1 - current_line[1]) < y_threshold:  # Close enough to the previous line
+            current_line[0] = min(current_line[0], x1)  # Extend left boundary
+            current_line[2] = max(current_line[2], x2)  # Extend right boundary
+            current_line[3] = max(current_line[3], y2)  # Extend bottom boundary
         else:
             merged_lines.append(current_line)
             current_line = list(boxes[i])
     return original_image, full_text
+# Gradio UI
 with gr.Blocks() as iface:
+    gr.Markdown("# 📜 Text Line Detection with Khmer OCR")
+    gr.Markdown("## 📷 Upload an image to detect text lines and extract Khmer text")
     with gr.Row():
         with gr.Column(scale=1):
+            gr.Markdown("### 📤 Upload Image")
             image_input = gr.Image(type="numpy", label="Upload an image")
         with gr.Column(scale=1):
+            gr.Markdown("### 🖼 Annotated Image with Bounding Boxes")
             output_annotated = gr.Image(type="pil", label="Detected Text Lines")
+    gr.Markdown("### 📝 Extracted Text (OCR Result)")
     output_text = gr.Textbox(label="Extracted Text", lines=10)
     image_input.upload(
         outputs=[output_annotated, output_text]
     )
+# 🚀 Ensure the app runs properly in Hugging Face Spaces
+if __name__ == "__main__":
+    iface.launch(server_name="0.0.0.0", server_port=7860)