Spaces:

Norakneath
/

TestingYolo

Running

App Files Files Community

Norakneath commited on Feb 12

Commit

6d3c4df

verified ·

1 Parent(s): fa2b0fb

Create app.py

Browse files

Files changed (1) hide show

app.py +83 -0

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import gradio as gr
+from ultralytics import YOLO
+from PIL import Image, ImageDraw
+import pytesseract
+# Set the correct Tesseract path for Hugging Face Spaces
+pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
+YOLO_MODEL_PATH = "best.pt"
+model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
+def merge_boxes_into_lines(boxes, y_threshold=10):
+    if len(boxes) == 0:
+        return []
+    boxes = sorted(boxes, key=lambda b: b[1])
+    merged_lines = []
+    current_line = list(boxes[0])
+    for i in range(1, len(boxes)):
+        x1, y1, x2, y2 = boxes[i]
+        if abs(y1 - current_line[1]) < y_threshold:
+            current_line[0] = min(current_line[0], x1)
+            current_line[2] = max(current_line[2], x2)
+            current_line[3] = max(current_line[3], y2)
+        else:
+            merged_lines.append(current_line)
+            current_line = list(boxes[i])
+    merged_lines.append(current_line)
+    return merged_lines
+def detect_and_ocr(image):
+    image = Image.fromarray(image)
+    original_image = image.copy()
+    results = model.predict(image, conf=0.3, iou=0.5, device="cpu")
+    detected_boxes = results[0].boxes.xyxy.tolist()
+    detected_boxes = [list(map(int, box)) for box in detected_boxes]
+    merged_boxes = merge_boxes_into_lines(detected_boxes)
+    draw = ImageDraw.Draw(original_image)
+    extracted_text_lines = []
+    for idx, (x1, y1, x2, y2) in enumerate(merged_boxes):
+        draw.rectangle([x1, y1, x2, y2], outline="blue", width=2)
+        draw.text((x1, y1 - 10), f"Line {idx}", fill="blue")
+        cropped_line = image.crop((x1, y1, x2, y2))
+        ocr_text = pytesseract.image_to_string(cropped_line, lang="eng").strip()
+        if ocr_text:
+            extracted_text_lines.append(ocr_text)
+    full_text = "\n".join(extracted_text_lines)
+    return original_image, full_text
+with gr.Blocks() as iface:
+    gr.Markdown("# Text Line Detection with OCR")
+    gr.Markdown("## Upload an image to detect text lines and extract text")
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### Upload Image")
+            image_input = gr.Image(type="numpy", label="Upload an image")
+        with gr.Column(scale=1):
+            gr.Markdown("### Annotated Image with Bounding Boxes")
+            output_annotated = gr.Image(type="pil", label="Detected Text Lines")
+    gr.Markdown("### Extracted Text (OCR Result)")
+    output_text = gr.Textbox(label="Extracted Text", lines=10)
+    image_input.upload(
+        detect_and_ocr,
+        inputs=image_input,
+        outputs=[output_annotated, output_text]
+    )
+iface.launch()