Norakneath commited on
Commit
0827df2
·
verified ·
1 Parent(s): 7bb23ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -20
app.py CHANGED
@@ -4,41 +4,40 @@ from PIL import Image, ImageDraw
4
  import pytesseract
5
  import subprocess
6
 
7
- # Set Tesseract path (Ensure it works on Hugging Face Spaces)
8
- pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
9
-
10
- # Load YOLO model
11
- YOLO_MODEL_PATH = "best.pt"
12
- model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
13
 
14
  def check_tesseract():
15
  """Check if Tesseract is installed and print its version."""
16
  try:
17
- tesseract_path = subprocess.check_output(["which", "tesseract"]).decode("utf-8").strip()
18
- tesseract_version = subprocess.check_output(["tesseract", "--version"]).decode("utf-8").split("\n")[0]
19
- print(f"Tesseract Path: {tesseract_path}")
20
  print(f"Tesseract Version: {tesseract_version}")
21
  return True
22
  except Exception as e:
23
  print(f"Tesseract not found: {e}")
24
  return False
25
 
 
 
 
 
26
  def merge_boxes_into_lines(boxes, y_threshold=10):
27
  """Merge bounding boxes if they belong to the same text row."""
28
  if len(boxes) == 0:
29
  return []
30
 
31
- boxes = sorted(boxes, key=lambda b: b[1])
32
  merged_lines = []
33
  current_line = list(boxes[0])
34
 
35
  for i in range(1, len(boxes)):
36
  x1, y1, x2, y2 = boxes[i]
37
 
38
- if abs(y1 - current_line[1]) < y_threshold:
39
- current_line[0] = min(current_line[0], x1)
40
- current_line[2] = max(current_line[2], x2)
41
- current_line[3] = max(current_line[3], y2)
42
  else:
43
  merged_lines.append(current_line)
44
  current_line = list(boxes[i])
@@ -78,20 +77,21 @@ def detect_and_ocr(image):
78
 
79
  return original_image, full_text
80
 
 
81
  with gr.Blocks() as iface:
82
- gr.Markdown("# Text Line Detection with Khmer OCR")
83
- gr.Markdown("## Upload an image to detect text lines and extract Khmer text")
84
 
85
  with gr.Row():
86
  with gr.Column(scale=1):
87
- gr.Markdown("### Upload Image")
88
  image_input = gr.Image(type="numpy", label="Upload an image")
89
 
90
  with gr.Column(scale=1):
91
- gr.Markdown("### Annotated Image with Bounding Boxes")
92
  output_annotated = gr.Image(type="pil", label="Detected Text Lines")
93
 
94
- gr.Markdown("### Extracted Text (OCR Result)")
95
  output_text = gr.Textbox(label="Extracted Text", lines=10)
96
 
97
  image_input.upload(
@@ -100,4 +100,6 @@ with gr.Blocks() as iface:
100
  outputs=[output_annotated, output_text]
101
  )
102
 
103
- iface.launch()
 
 
 
4
  import pytesseract
5
  import subprocess
6
 
7
+ # Ensure Tesseract OCR is installed and detected
8
+ TESSERACT_PATH = "/usr/bin/tesseract"
9
+ pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH
 
 
 
10
 
11
  def check_tesseract():
12
  """Check if Tesseract is installed and print its version."""
13
  try:
14
+ tesseract_version = subprocess.check_output([TESSERACT_PATH, "--version"]).decode("utf-8").split("\n")[0]
 
 
15
  print(f"Tesseract Version: {tesseract_version}")
16
  return True
17
  except Exception as e:
18
  print(f"Tesseract not found: {e}")
19
  return False
20
 
21
+ # Load YOLO model (ensure best.pt exists in the working directory)
22
+ YOLO_MODEL_PATH = "best.pt"
23
+ model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
24
+
25
  def merge_boxes_into_lines(boxes, y_threshold=10):
26
  """Merge bounding boxes if they belong to the same text row."""
27
  if len(boxes) == 0:
28
  return []
29
 
30
+ boxes = sorted(boxes, key=lambda b: b[1]) # Sort by y-axis (top position)
31
  merged_lines = []
32
  current_line = list(boxes[0])
33
 
34
  for i in range(1, len(boxes)):
35
  x1, y1, x2, y2 = boxes[i]
36
 
37
+ if abs(y1 - current_line[1]) < y_threshold: # Close enough to the previous line
38
+ current_line[0] = min(current_line[0], x1) # Extend left boundary
39
+ current_line[2] = max(current_line[2], x2) # Extend right boundary
40
+ current_line[3] = max(current_line[3], y2) # Extend bottom boundary
41
  else:
42
  merged_lines.append(current_line)
43
  current_line = list(boxes[i])
 
77
 
78
  return original_image, full_text
79
 
80
+ # Gradio UI
81
  with gr.Blocks() as iface:
82
+ gr.Markdown("# 📜 Text Line Detection with Khmer OCR")
83
+ gr.Markdown("## 📷 Upload an image to detect text lines and extract Khmer text")
84
 
85
  with gr.Row():
86
  with gr.Column(scale=1):
87
+ gr.Markdown("### 📤 Upload Image")
88
  image_input = gr.Image(type="numpy", label="Upload an image")
89
 
90
  with gr.Column(scale=1):
91
+ gr.Markdown("### 🖼 Annotated Image with Bounding Boxes")
92
  output_annotated = gr.Image(type="pil", label="Detected Text Lines")
93
 
94
+ gr.Markdown("### 📝 Extracted Text (OCR Result)")
95
  output_text = gr.Textbox(label="Extracted Text", lines=10)
96
 
97
  image_input.upload(
 
100
  outputs=[output_annotated, output_text]
101
  )
102
 
103
+ # 🚀 Ensure the app runs properly in Hugging Face Spaces
104
+ if __name__ == "__main__":
105
+ iface.launch(server_name="0.0.0.0", server_port=7860)